• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/arch/arm/kernel/
1#undef DEBUG
2
3/*
4 * ARM performance counter support.
5 *
6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7 *
8 * ARMv7 support: Jean Pihet <jpihet@mvista.com>
9 * 2010 (c) MontaVista Software, LLC.
10 *
11 * This code is based on the sparc64 perf event code, which is in turn based
12 * on the x86 code. Callchain code is based on the ARM OProfile backtrace
13 * code.
14 */
15#define pr_fmt(fmt) "hw perfevents: " fmt
16
17#include <linux/interrupt.h>
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/perf_event.h>
21#include <linux/platform_device.h>
22#include <linux/spinlock.h>
23#include <linux/uaccess.h>
24
25#include <asm/cputype.h>
26#include <asm/irq.h>
27#include <asm/irq_regs.h>
28#include <asm/pmu.h>
29#include <asm/stacktrace.h>
30
31static struct platform_device *pmu_device;
32
33/*
34 * Hardware lock to serialize accesses to PMU registers. Needed for the
35 * read/modify/write sequences.
36 */
37DEFINE_SPINLOCK(pmu_lock);
38
39/*
40 * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
41 * another platform that supports more, we need to increase this to be the
42 * largest of all platforms.
43 *
44 * ARMv7 supports up to 32 events:
45 *  cycle counter CCNT + 31 events counters CNT0..30.
46 *  Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
47 */
48#define ARMPMU_MAX_HWEVENTS		33
49
50/* The events for a given CPU. */
51struct cpu_hw_events {
52	/*
53	 * The events that are active on the CPU for the given index. Index 0
54	 * is reserved.
55	 */
56	struct perf_event	*events[ARMPMU_MAX_HWEVENTS];
57
58	/*
59	 * A 1 bit for an index indicates that the counter is being used for
60	 * an event. A 0 means that the counter can be used.
61	 */
62	unsigned long		used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
63
64	/*
65	 * A 1 bit for an index indicates that the counter is actively being
66	 * used.
67	 */
68	unsigned long		active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
69};
70DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
71
72/* PMU names. */
73static const char *arm_pmu_names[] = {
74	[ARM_PERF_PMU_ID_XSCALE1] = "xscale1",
75	[ARM_PERF_PMU_ID_XSCALE2] = "xscale2",
76	[ARM_PERF_PMU_ID_V6]	  = "v6",
77	[ARM_PERF_PMU_ID_V6MP]	  = "v6mpcore",
78	[ARM_PERF_PMU_ID_CA8]	  = "ARMv7 Cortex-A8",
79	[ARM_PERF_PMU_ID_CA9]	  = "ARMv7 Cortex-A9",
80};
81
82struct arm_pmu {
83	enum arm_perf_pmu_ids id;
84	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
85	void		(*enable)(struct hw_perf_event *evt, int idx);
86	void		(*disable)(struct hw_perf_event *evt, int idx);
87	int		(*event_map)(int evt);
88	u64		(*raw_event)(u64);
89	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
90					 struct hw_perf_event *hwc);
91	u32		(*read_counter)(int idx);
92	void		(*write_counter)(int idx, u32 val);
93	void		(*start)(void);
94	void		(*stop)(void);
95	int		num_events;
96	u64		max_period;
97};
98
99/* Set at runtime when we know what CPU type we are. */
100static const struct arm_pmu *armpmu;
101
102enum arm_perf_pmu_ids
103armpmu_get_pmu_id(void)
104{
105	int id = -ENODEV;
106
107	if (armpmu != NULL)
108		id = armpmu->id;
109
110	return id;
111}
112EXPORT_SYMBOL_GPL(armpmu_get_pmu_id);
113
114int
115armpmu_get_max_events(void)
116{
117	int max_events = 0;
118
119	if (armpmu != NULL)
120		max_events = armpmu->num_events;
121
122	return max_events;
123}
124EXPORT_SYMBOL_GPL(armpmu_get_max_events);
125
126#define HW_OP_UNSUPPORTED		0xFFFF
127
128#define C(_x) \
129	PERF_COUNT_HW_CACHE_##_x
130
131#define CACHE_OP_UNSUPPORTED		0xFFFF
132
133static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
134				     [PERF_COUNT_HW_CACHE_OP_MAX]
135				     [PERF_COUNT_HW_CACHE_RESULT_MAX];
136
137static int
138armpmu_map_cache_event(u64 config)
139{
140	unsigned int cache_type, cache_op, cache_result, ret;
141
142	cache_type = (config >>  0) & 0xff;
143	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
144		return -EINVAL;
145
146	cache_op = (config >>  8) & 0xff;
147	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
148		return -EINVAL;
149
150	cache_result = (config >> 16) & 0xff;
151	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
152		return -EINVAL;
153
154	ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result];
155
156	if (ret == CACHE_OP_UNSUPPORTED)
157		return -ENOENT;
158
159	return ret;
160}
161
162static int
163armpmu_event_set_period(struct perf_event *event,
164			struct hw_perf_event *hwc,
165			int idx)
166{
167	s64 left = local64_read(&hwc->period_left);
168	s64 period = hwc->sample_period;
169	int ret = 0;
170
171	if (unlikely(left <= -period)) {
172		left = period;
173		local64_set(&hwc->period_left, left);
174		hwc->last_period = period;
175		ret = 1;
176	}
177
178	if (unlikely(left <= 0)) {
179		left += period;
180		local64_set(&hwc->period_left, left);
181		hwc->last_period = period;
182		ret = 1;
183	}
184
185	if (left > (s64)armpmu->max_period)
186		left = armpmu->max_period;
187
188	local64_set(&hwc->prev_count, (u64)-left);
189
190	armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
191
192	perf_event_update_userpage(event);
193
194	return ret;
195}
196
197static u64
198armpmu_event_update(struct perf_event *event,
199		    struct hw_perf_event *hwc,
200		    int idx)
201{
202	int shift = 64 - 32;
203	s64 prev_raw_count, new_raw_count;
204	u64 delta;
205
206again:
207	prev_raw_count = local64_read(&hwc->prev_count);
208	new_raw_count = armpmu->read_counter(idx);
209
210	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
211			     new_raw_count) != prev_raw_count)
212		goto again;
213
214	delta = (new_raw_count << shift) - (prev_raw_count << shift);
215	delta >>= shift;
216
217	local64_add(delta, &event->count);
218	local64_sub(delta, &hwc->period_left);
219
220	return new_raw_count;
221}
222
223static void
224armpmu_disable(struct perf_event *event)
225{
226	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
227	struct hw_perf_event *hwc = &event->hw;
228	int idx = hwc->idx;
229
230	WARN_ON(idx < 0);
231
232	clear_bit(idx, cpuc->active_mask);
233	armpmu->disable(hwc, idx);
234
235	barrier();
236
237	armpmu_event_update(event, hwc, idx);
238	cpuc->events[idx] = NULL;
239	clear_bit(idx, cpuc->used_mask);
240
241	perf_event_update_userpage(event);
242}
243
244static void
245armpmu_read(struct perf_event *event)
246{
247	struct hw_perf_event *hwc = &event->hw;
248
249	/* Don't read disabled counters! */
250	if (hwc->idx < 0)
251		return;
252
253	armpmu_event_update(event, hwc, hwc->idx);
254}
255
256static void
257armpmu_unthrottle(struct perf_event *event)
258{
259	struct hw_perf_event *hwc = &event->hw;
260
261	/*
262	 * Set the period again. Some counters can't be stopped, so when we
263	 * were throttled we simply disabled the IRQ source and the counter
264	 * may have been left counting. If we don't do this step then we may
265	 * get an interrupt too soon or *way* too late if the overflow has
266	 * happened since disabling.
267	 */
268	armpmu_event_set_period(event, hwc, hwc->idx);
269	armpmu->enable(hwc, hwc->idx);
270}
271
272static int
273armpmu_enable(struct perf_event *event)
274{
275	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
276	struct hw_perf_event *hwc = &event->hw;
277	int idx;
278	int err = 0;
279
280	/* If we don't have a space for the counter then finish early. */
281	idx = armpmu->get_event_idx(cpuc, hwc);
282	if (idx < 0) {
283		err = idx;
284		goto out;
285	}
286
287	/*
288	 * If there is an event in the counter we are going to use then make
289	 * sure it is disabled.
290	 */
291	event->hw.idx = idx;
292	armpmu->disable(hwc, idx);
293	cpuc->events[idx] = event;
294	set_bit(idx, cpuc->active_mask);
295
296	/* Set the period for the event. */
297	armpmu_event_set_period(event, hwc, idx);
298
299	/* Enable the event. */
300	armpmu->enable(hwc, idx);
301
302	/* Propagate our changes to the userspace mapping. */
303	perf_event_update_userpage(event);
304
305out:
306	return err;
307}
308
309static struct pmu pmu = {
310	.enable	    = armpmu_enable,
311	.disable    = armpmu_disable,
312	.unthrottle = armpmu_unthrottle,
313	.read	    = armpmu_read,
314};
315
316static int
317validate_event(struct cpu_hw_events *cpuc,
318	       struct perf_event *event)
319{
320	struct hw_perf_event fake_event = event->hw;
321
322	if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
323		return 1;
324
325	return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
326}
327
328static int
329validate_group(struct perf_event *event)
330{
331	struct perf_event *sibling, *leader = event->group_leader;
332	struct cpu_hw_events fake_pmu;
333
334	memset(&fake_pmu, 0, sizeof(fake_pmu));
335
336	if (!validate_event(&fake_pmu, leader))
337		return -ENOSPC;
338
339	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
340		if (!validate_event(&fake_pmu, sibling))
341			return -ENOSPC;
342	}
343
344	if (!validate_event(&fake_pmu, event))
345		return -ENOSPC;
346
347	return 0;
348}
349
350static int
351armpmu_reserve_hardware(void)
352{
353	int i, err = -ENODEV, irq;
354
355	pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU);
356	if (IS_ERR(pmu_device)) {
357		pr_warning("unable to reserve pmu\n");
358		return PTR_ERR(pmu_device);
359	}
360
361	init_pmu(ARM_PMU_DEVICE_CPU);
362
363	if (pmu_device->num_resources < 1) {
364		pr_err("no irqs for PMUs defined\n");
365		return -ENODEV;
366	}
367
368	for (i = 0; i < pmu_device->num_resources; ++i) {
369		irq = platform_get_irq(pmu_device, i);
370		if (irq < 0)
371			continue;
372
373		err = request_irq(irq, armpmu->handle_irq,
374				  IRQF_DISABLED | IRQF_NOBALANCING,
375				  "armpmu", NULL);
376		if (err) {
377			pr_warning("unable to request IRQ%d for ARM perf "
378				"counters\n", irq);
379			break;
380		}
381	}
382
383	if (err) {
384		for (i = i - 1; i >= 0; --i) {
385			irq = platform_get_irq(pmu_device, i);
386			if (irq >= 0)
387				free_irq(irq, NULL);
388		}
389		release_pmu(pmu_device);
390		pmu_device = NULL;
391	}
392
393	return err;
394}
395
396static void
397armpmu_release_hardware(void)
398{
399	int i, irq;
400
401	for (i = pmu_device->num_resources - 1; i >= 0; --i) {
402		irq = platform_get_irq(pmu_device, i);
403		if (irq >= 0)
404			free_irq(irq, NULL);
405	}
406	armpmu->stop();
407
408	release_pmu(pmu_device);
409	pmu_device = NULL;
410}
411
412static atomic_t active_events = ATOMIC_INIT(0);
413static DEFINE_MUTEX(pmu_reserve_mutex);
414
415static void
416hw_perf_event_destroy(struct perf_event *event)
417{
418	if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
419		armpmu_release_hardware();
420		mutex_unlock(&pmu_reserve_mutex);
421	}
422}
423
424static int
425__hw_perf_event_init(struct perf_event *event)
426{
427	struct hw_perf_event *hwc = &event->hw;
428	int mapping, err;
429
430	/* Decode the generic type into an ARM event identifier. */
431	if (PERF_TYPE_HARDWARE == event->attr.type) {
432		mapping = armpmu->event_map(event->attr.config);
433	} else if (PERF_TYPE_HW_CACHE == event->attr.type) {
434		mapping = armpmu_map_cache_event(event->attr.config);
435	} else if (PERF_TYPE_RAW == event->attr.type) {
436		mapping = armpmu->raw_event(event->attr.config);
437	} else {
438		pr_debug("event type %x not supported\n", event->attr.type);
439		return -EOPNOTSUPP;
440	}
441
442	if (mapping < 0) {
443		pr_debug("event %x:%llx not supported\n", event->attr.type,
444			 event->attr.config);
445		return mapping;
446	}
447
448	/*
449	 * Check whether we need to exclude the counter from certain modes.
450	 * The ARM performance counters are on all of the time so if someone
451	 * has asked us for some excludes then we have to fail.
452	 */
453	if (event->attr.exclude_kernel || event->attr.exclude_user ||
454	    event->attr.exclude_hv || event->attr.exclude_idle) {
455		pr_debug("ARM performance counters do not support "
456			 "mode exclusion\n");
457		return -EPERM;
458	}
459
460	/*
461	 * We don't assign an index until we actually place the event onto
462	 * hardware. Use -1 to signify that we haven't decided where to put it
463	 * yet. For SMP systems, each core has it's own PMU so we can't do any
464	 * clever allocation or constraints checking at this point.
465	 */
466	hwc->idx = -1;
467
468	/*
469	 * Store the event encoding into the config_base field. config and
470	 * event_base are unused as the only 2 things we need to know are
471	 * the event mapping and the counter to use. The counter to use is
472	 * also the indx and the config_base is the event type.
473	 */
474	hwc->config_base	    = (unsigned long)mapping;
475	hwc->config		    = 0;
476	hwc->event_base		    = 0;
477
478	if (!hwc->sample_period) {
479		hwc->sample_period  = armpmu->max_period;
480		hwc->last_period    = hwc->sample_period;
481		local64_set(&hwc->period_left, hwc->sample_period);
482	}
483
484	err = 0;
485	if (event->group_leader != event) {
486		err = validate_group(event);
487		if (err)
488			return -EINVAL;
489	}
490
491	return err;
492}
493
494const struct pmu *
495hw_perf_event_init(struct perf_event *event)
496{
497	int err = 0;
498
499	if (!armpmu)
500		return ERR_PTR(-ENODEV);
501
502	event->destroy = hw_perf_event_destroy;
503
504	if (!atomic_inc_not_zero(&active_events)) {
505		if (atomic_read(&active_events) > perf_max_events) {
506			atomic_dec(&active_events);
507			return ERR_PTR(-ENOSPC);
508		}
509
510		mutex_lock(&pmu_reserve_mutex);
511		if (atomic_read(&active_events) == 0) {
512			err = armpmu_reserve_hardware();
513		}
514
515		if (!err)
516			atomic_inc(&active_events);
517		mutex_unlock(&pmu_reserve_mutex);
518	}
519
520	if (err)
521		return ERR_PTR(err);
522
523	err = __hw_perf_event_init(event);
524	if (err)
525		hw_perf_event_destroy(event);
526
527	return err ? ERR_PTR(err) : &pmu;
528}
529
530void
531hw_perf_enable(void)
532{
533	/* Enable all of the perf events on hardware. */
534	int idx;
535	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
536
537	if (!armpmu)
538		return;
539
540	for (idx = 0; idx <= armpmu->num_events; ++idx) {
541		struct perf_event *event = cpuc->events[idx];
542
543		if (!event)
544			continue;
545
546		armpmu->enable(&event->hw, idx);
547	}
548
549	armpmu->start();
550}
551
552void
553hw_perf_disable(void)
554{
555	if (armpmu)
556		armpmu->stop();
557}
558
559/*
560 * ARMv6 Performance counter handling code.
561 *
562 * ARMv6 has 2 configurable performance counters and a single cycle counter.
563 * They all share a single reset bit but can be written to zero so we can use
564 * that for a reset.
565 *
566 * The counters can't be individually enabled or disabled so when we remove
567 * one event and replace it with another we could get spurious counts from the
568 * wrong event. However, we can take advantage of the fact that the
569 * performance counters can export events to the event bus, and the event bus
570 * itself can be monitored. This requires that we *don't* export the events to
571 * the event bus. The procedure for disabling a configurable counter is:
572 *	- change the counter to count the ETMEXTOUT[0] signal (0x20). This
573 *	  effectively stops the counter from counting.
574 *	- disable the counter's interrupt generation (each counter has it's
575 *	  own interrupt enable bit).
576 * Once stopped, the counter value can be written as 0 to reset.
577 *
578 * To enable a counter:
579 *	- enable the counter's interrupt generation.
580 *	- set the new event type.
581 *
582 * Note: the dedicated cycle counter only counts cycles and can't be
583 * enabled/disabled independently of the others. When we want to disable the
584 * cycle counter, we have to just disable the interrupt reporting and start
585 * ignoring that counter. When re-enabling, we have to reset the value and
586 * enable the interrupt.
587 */
588
589enum armv6_perf_types {
590	ARMV6_PERFCTR_ICACHE_MISS	    = 0x0,
591	ARMV6_PERFCTR_IBUF_STALL	    = 0x1,
592	ARMV6_PERFCTR_DDEP_STALL	    = 0x2,
593	ARMV6_PERFCTR_ITLB_MISS		    = 0x3,
594	ARMV6_PERFCTR_DTLB_MISS		    = 0x4,
595	ARMV6_PERFCTR_BR_EXEC		    = 0x5,
596	ARMV6_PERFCTR_BR_MISPREDICT	    = 0x6,
597	ARMV6_PERFCTR_INSTR_EXEC	    = 0x7,
598	ARMV6_PERFCTR_DCACHE_HIT	    = 0x9,
599	ARMV6_PERFCTR_DCACHE_ACCESS	    = 0xA,
600	ARMV6_PERFCTR_DCACHE_MISS	    = 0xB,
601	ARMV6_PERFCTR_DCACHE_WBACK	    = 0xC,
602	ARMV6_PERFCTR_SW_PC_CHANGE	    = 0xD,
603	ARMV6_PERFCTR_MAIN_TLB_MISS	    = 0xF,
604	ARMV6_PERFCTR_EXPL_D_ACCESS	    = 0x10,
605	ARMV6_PERFCTR_LSU_FULL_STALL	    = 0x11,
606	ARMV6_PERFCTR_WBUF_DRAINED	    = 0x12,
607	ARMV6_PERFCTR_CPU_CYCLES	    = 0xFF,
608	ARMV6_PERFCTR_NOP		    = 0x20,
609};
610
611enum armv6_counters {
612	ARMV6_CYCLE_COUNTER = 1,
613	ARMV6_COUNTER0,
614	ARMV6_COUNTER1,
615};
616
617/*
618 * The hardware events that we support. We do support cache operations but
619 * we have harvard caches and no way to combine instruction and data
620 * accesses/misses in hardware.
621 */
622static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
623	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV6_PERFCTR_CPU_CYCLES,
624	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV6_PERFCTR_INSTR_EXEC,
625	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
626	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
627	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
628	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV6_PERFCTR_BR_MISPREDICT,
629	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
630};
631
632static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
633					  [PERF_COUNT_HW_CACHE_OP_MAX]
634					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
635	[C(L1D)] = {
636		/*
637		 * The performance counters don't differentiate between read
638		 * and write accesses/misses so this isn't strictly correct,
639		 * but it's the best we can do. Writes and reads get
640		 * combined.
641		 */
642		[C(OP_READ)] = {
643			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
644			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
645		},
646		[C(OP_WRITE)] = {
647			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
648			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
649		},
650		[C(OP_PREFETCH)] = {
651			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
652			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
653		},
654	},
655	[C(L1I)] = {
656		[C(OP_READ)] = {
657			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
658			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
659		},
660		[C(OP_WRITE)] = {
661			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
662			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
663		},
664		[C(OP_PREFETCH)] = {
665			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
666			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
667		},
668	},
669	[C(LL)] = {
670		[C(OP_READ)] = {
671			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
672			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
673		},
674		[C(OP_WRITE)] = {
675			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
676			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
677		},
678		[C(OP_PREFETCH)] = {
679			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
680			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
681		},
682	},
683	[C(DTLB)] = {
684		/*
685		 * The ARM performance counters can count micro DTLB misses,
686		 * micro ITLB misses and main TLB misses. There isn't an event
687		 * for TLB misses, so use the micro misses here and if users
688		 * want the main TLB misses they can use a raw counter.
689		 */
690		[C(OP_READ)] = {
691			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
692			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
693		},
694		[C(OP_WRITE)] = {
695			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
696			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
697		},
698		[C(OP_PREFETCH)] = {
699			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
700			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
701		},
702	},
703	[C(ITLB)] = {
704		[C(OP_READ)] = {
705			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
706			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
707		},
708		[C(OP_WRITE)] = {
709			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
710			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
711		},
712		[C(OP_PREFETCH)] = {
713			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
714			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
715		},
716	},
717	[C(BPU)] = {
718		[C(OP_READ)] = {
719			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
720			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
721		},
722		[C(OP_WRITE)] = {
723			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
724			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
725		},
726		[C(OP_PREFETCH)] = {
727			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
728			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
729		},
730	},
731};
732
733enum armv6mpcore_perf_types {
734	ARMV6MPCORE_PERFCTR_ICACHE_MISS	    = 0x0,
735	ARMV6MPCORE_PERFCTR_IBUF_STALL	    = 0x1,
736	ARMV6MPCORE_PERFCTR_DDEP_STALL	    = 0x2,
737	ARMV6MPCORE_PERFCTR_ITLB_MISS	    = 0x3,
738	ARMV6MPCORE_PERFCTR_DTLB_MISS	    = 0x4,
739	ARMV6MPCORE_PERFCTR_BR_EXEC	    = 0x5,
740	ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
741	ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
742	ARMV6MPCORE_PERFCTR_INSTR_EXEC	    = 0x8,
743	ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
744	ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
745	ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
746	ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
747	ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
748	ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
749	ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
750	ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
751	ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
752	ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
753	ARMV6MPCORE_PERFCTR_CPU_CYCLES	    = 0xFF,
754};
755
756/*
757 * The hardware events that we support. We do support cache operations but
758 * we have harvard caches and no way to combine instruction and data
759 * accesses/misses in hardware.
760 */
761static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
762	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
763	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
764	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
765	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
766	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
767	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
768	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
769};
770
771static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
772					[PERF_COUNT_HW_CACHE_OP_MAX]
773					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
774	[C(L1D)] = {
775		[C(OP_READ)] = {
776			[C(RESULT_ACCESS)]  =
777				ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
778			[C(RESULT_MISS)]    =
779				ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
780		},
781		[C(OP_WRITE)] = {
782			[C(RESULT_ACCESS)]  =
783				ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
784			[C(RESULT_MISS)]    =
785				ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
786		},
787		[C(OP_PREFETCH)] = {
788			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
789			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
790		},
791	},
792	[C(L1I)] = {
793		[C(OP_READ)] = {
794			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
795			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
796		},
797		[C(OP_WRITE)] = {
798			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
799			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
800		},
801		[C(OP_PREFETCH)] = {
802			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
803			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
804		},
805	},
806	[C(LL)] = {
807		[C(OP_READ)] = {
808			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
809			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
810		},
811		[C(OP_WRITE)] = {
812			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
813			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
814		},
815		[C(OP_PREFETCH)] = {
816			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
817			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
818		},
819	},
820	[C(DTLB)] = {
821		/*
822		 * The ARM performance counters can count micro DTLB misses,
823		 * micro ITLB misses and main TLB misses. There isn't an event
824		 * for TLB misses, so use the micro misses here and if users
825		 * want the main TLB misses they can use a raw counter.
826		 */
827		[C(OP_READ)] = {
828			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
829			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
830		},
831		[C(OP_WRITE)] = {
832			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
833			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
834		},
835		[C(OP_PREFETCH)] = {
836			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
837			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
838		},
839	},
840	[C(ITLB)] = {
841		[C(OP_READ)] = {
842			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
843			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
844		},
845		[C(OP_WRITE)] = {
846			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
847			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
848		},
849		[C(OP_PREFETCH)] = {
850			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
851			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
852		},
853	},
854	[C(BPU)] = {
855		[C(OP_READ)] = {
856			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
857			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
858		},
859		[C(OP_WRITE)] = {
860			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
861			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
862		},
863		[C(OP_PREFETCH)] = {
864			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
865			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
866		},
867	},
868};
869
870static inline unsigned long
871armv6_pmcr_read(void)
872{
873	u32 val;
874	asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
875	return val;
876}
877
878static inline void
879armv6_pmcr_write(unsigned long val)
880{
881	asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
882}
883
884#define ARMV6_PMCR_ENABLE		(1 << 0)
885#define ARMV6_PMCR_CTR01_RESET		(1 << 1)
886#define ARMV6_PMCR_CCOUNT_RESET		(1 << 2)
887#define ARMV6_PMCR_CCOUNT_DIV		(1 << 3)
888#define ARMV6_PMCR_COUNT0_IEN		(1 << 4)
889#define ARMV6_PMCR_COUNT1_IEN		(1 << 5)
890#define ARMV6_PMCR_CCOUNT_IEN		(1 << 6)
891#define ARMV6_PMCR_COUNT0_OVERFLOW	(1 << 8)
892#define ARMV6_PMCR_COUNT1_OVERFLOW	(1 << 9)
893#define ARMV6_PMCR_CCOUNT_OVERFLOW	(1 << 10)
894#define ARMV6_PMCR_EVT_COUNT0_SHIFT	20
895#define ARMV6_PMCR_EVT_COUNT0_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
896#define ARMV6_PMCR_EVT_COUNT1_SHIFT	12
897#define ARMV6_PMCR_EVT_COUNT1_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
898
899#define ARMV6_PMCR_OVERFLOWED_MASK \
900	(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
901	 ARMV6_PMCR_CCOUNT_OVERFLOW)
902
903static inline int
904armv6_pmcr_has_overflowed(unsigned long pmcr)
905{
906	return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
907}
908
909static inline int
910armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
911				  enum armv6_counters counter)
912{
913	int ret = 0;
914
915	if (ARMV6_CYCLE_COUNTER == counter)
916		ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
917	else if (ARMV6_COUNTER0 == counter)
918		ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
919	else if (ARMV6_COUNTER1 == counter)
920		ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
921	else
922		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
923
924	return ret;
925}
926
927static inline u32
928armv6pmu_read_counter(int counter)
929{
930	unsigned long value = 0;
931
932	if (ARMV6_CYCLE_COUNTER == counter)
933		asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
934	else if (ARMV6_COUNTER0 == counter)
935		asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
936	else if (ARMV6_COUNTER1 == counter)
937		asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
938	else
939		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
940
941	return value;
942}
943
944static inline void
945armv6pmu_write_counter(int counter,
946		       u32 value)
947{
948	if (ARMV6_CYCLE_COUNTER == counter)
949		asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
950	else if (ARMV6_COUNTER0 == counter)
951		asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
952	else if (ARMV6_COUNTER1 == counter)
953		asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
954	else
955		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
956}
957
958void
959armv6pmu_enable_event(struct hw_perf_event *hwc,
960		      int idx)
961{
962	unsigned long val, mask, evt, flags;
963
964	if (ARMV6_CYCLE_COUNTER == idx) {
965		mask	= 0;
966		evt	= ARMV6_PMCR_CCOUNT_IEN;
967	} else if (ARMV6_COUNTER0 == idx) {
968		mask	= ARMV6_PMCR_EVT_COUNT0_MASK;
969		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
970			  ARMV6_PMCR_COUNT0_IEN;
971	} else if (ARMV6_COUNTER1 == idx) {
972		mask	= ARMV6_PMCR_EVT_COUNT1_MASK;
973		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
974			  ARMV6_PMCR_COUNT1_IEN;
975	} else {
976		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
977		return;
978	}
979
980	/*
981	 * Mask out the current event and set the counter to count the event
982	 * that we're interested in.
983	 */
984	spin_lock_irqsave(&pmu_lock, flags);
985	val = armv6_pmcr_read();
986	val &= ~mask;
987	val |= evt;
988	armv6_pmcr_write(val);
989	spin_unlock_irqrestore(&pmu_lock, flags);
990}
991
992static irqreturn_t
993armv6pmu_handle_irq(int irq_num,
994		    void *dev)
995{
996	unsigned long pmcr = armv6_pmcr_read();
997	struct perf_sample_data data;
998	struct cpu_hw_events *cpuc;
999	struct pt_regs *regs;
1000	int idx;
1001
1002	if (!armv6_pmcr_has_overflowed(pmcr))
1003		return IRQ_NONE;
1004
1005	regs = get_irq_regs();
1006
1007	/*
1008	 * The interrupts are cleared by writing the overflow flags back to
1009	 * the control register. All of the other bits don't have any effect
1010	 * if they are rewritten, so write the whole value back.
1011	 */
1012	armv6_pmcr_write(pmcr);
1013
1014	perf_sample_data_init(&data, 0);
1015
1016	cpuc = &__get_cpu_var(cpu_hw_events);
1017	for (idx = 0; idx <= armpmu->num_events; ++idx) {
1018		struct perf_event *event = cpuc->events[idx];
1019		struct hw_perf_event *hwc;
1020
1021		if (!test_bit(idx, cpuc->active_mask))
1022			continue;
1023
1024		/*
1025		 * We have a single interrupt for all counters. Check that
1026		 * each counter has overflowed before we process it.
1027		 */
1028		if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
1029			continue;
1030
1031		hwc = &event->hw;
1032		armpmu_event_update(event, hwc, idx);
1033		data.period = event->hw.last_period;
1034		if (!armpmu_event_set_period(event, hwc, idx))
1035			continue;
1036
1037		if (perf_event_overflow(event, 0, &data, regs))
1038			armpmu->disable(hwc, idx);
1039	}
1040
1041	/*
1042	 * Handle the pending perf events.
1043	 *
1044	 * Note: this call *must* be run with interrupts disabled. For
1045	 * platforms that can have the PMU interrupts raised as an NMI, this
1046	 * will not work.
1047	 */
1048	perf_event_do_pending();
1049
1050	return IRQ_HANDLED;
1051}
1052
1053static void
1054armv6pmu_start(void)
1055{
1056	unsigned long flags, val;
1057
1058	spin_lock_irqsave(&pmu_lock, flags);
1059	val = armv6_pmcr_read();
1060	val |= ARMV6_PMCR_ENABLE;
1061	armv6_pmcr_write(val);
1062	spin_unlock_irqrestore(&pmu_lock, flags);
1063}
1064
1065void
1066armv6pmu_stop(void)
1067{
1068	unsigned long flags, val;
1069
1070	spin_lock_irqsave(&pmu_lock, flags);
1071	val = armv6_pmcr_read();
1072	val &= ~ARMV6_PMCR_ENABLE;
1073	armv6_pmcr_write(val);
1074	spin_unlock_irqrestore(&pmu_lock, flags);
1075}
1076
1077static inline int
1078armv6pmu_event_map(int config)
1079{
1080	int mapping = armv6_perf_map[config];
1081	if (HW_OP_UNSUPPORTED == mapping)
1082		mapping = -EOPNOTSUPP;
1083	return mapping;
1084}
1085
1086static inline int
1087armv6mpcore_pmu_event_map(int config)
1088{
1089	int mapping = armv6mpcore_perf_map[config];
1090	if (HW_OP_UNSUPPORTED == mapping)
1091		mapping = -EOPNOTSUPP;
1092	return mapping;
1093}
1094
1095static u64
1096armv6pmu_raw_event(u64 config)
1097{
1098	return config & 0xff;
1099}
1100
1101static int
1102armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
1103		       struct hw_perf_event *event)
1104{
1105	/* Always place a cycle counter into the cycle counter. */
1106	if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
1107		if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
1108			return -EAGAIN;
1109
1110		return ARMV6_CYCLE_COUNTER;
1111	} else {
1112		/*
1113		 * For anything other than a cycle counter, try and use
1114		 * counter0 and counter1.
1115		 */
1116		if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
1117			return ARMV6_COUNTER1;
1118		}
1119
1120		if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
1121			return ARMV6_COUNTER0;
1122		}
1123
1124		/* The counters are all in use. */
1125		return -EAGAIN;
1126	}
1127}
1128
1129static void
1130armv6pmu_disable_event(struct hw_perf_event *hwc,
1131		       int idx)
1132{
1133	unsigned long val, mask, evt, flags;
1134
1135	if (ARMV6_CYCLE_COUNTER == idx) {
1136		mask	= ARMV6_PMCR_CCOUNT_IEN;
1137		evt	= 0;
1138	} else if (ARMV6_COUNTER0 == idx) {
1139		mask	= ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
1140		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
1141	} else if (ARMV6_COUNTER1 == idx) {
1142		mask	= ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
1143		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
1144	} else {
1145		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1146		return;
1147	}
1148
1149	/*
1150	 * Mask out the current event and set the counter to count the number
1151	 * of ETM bus signal assertion cycles. The external reporting should
1152	 * be disabled and so this should never increment.
1153	 */
1154	spin_lock_irqsave(&pmu_lock, flags);
1155	val = armv6_pmcr_read();
1156	val &= ~mask;
1157	val |= evt;
1158	armv6_pmcr_write(val);
1159	spin_unlock_irqrestore(&pmu_lock, flags);
1160}
1161
1162static void
1163armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
1164			      int idx)
1165{
1166	unsigned long val, mask, flags, evt = 0;
1167
1168	if (ARMV6_CYCLE_COUNTER == idx) {
1169		mask	= ARMV6_PMCR_CCOUNT_IEN;
1170	} else if (ARMV6_COUNTER0 == idx) {
1171		mask	= ARMV6_PMCR_COUNT0_IEN;
1172	} else if (ARMV6_COUNTER1 == idx) {
1173		mask	= ARMV6_PMCR_COUNT1_IEN;
1174	} else {
1175		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1176		return;
1177	}
1178
1179	/*
1180	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
1181	 * simply disable the interrupt reporting.
1182	 */
1183	spin_lock_irqsave(&pmu_lock, flags);
1184	val = armv6_pmcr_read();
1185	val &= ~mask;
1186	val |= evt;
1187	armv6_pmcr_write(val);
1188	spin_unlock_irqrestore(&pmu_lock, flags);
1189}
1190
1191static const struct arm_pmu armv6pmu = {
1192	.id			= ARM_PERF_PMU_ID_V6,
1193	.handle_irq		= armv6pmu_handle_irq,
1194	.enable			= armv6pmu_enable_event,
1195	.disable		= armv6pmu_disable_event,
1196	.event_map		= armv6pmu_event_map,
1197	.raw_event		= armv6pmu_raw_event,
1198	.read_counter		= armv6pmu_read_counter,
1199	.write_counter		= armv6pmu_write_counter,
1200	.get_event_idx		= armv6pmu_get_event_idx,
1201	.start			= armv6pmu_start,
1202	.stop			= armv6pmu_stop,
1203	.num_events		= 3,
1204	.max_period		= (1LLU << 32) - 1,
1205};
1206
1207/*
1208 * ARMv6mpcore is almost identical to single core ARMv6 with the exception
1209 * that some of the events have different enumerations and that there is no
1210 * *hack* to stop the programmable counters. To stop the counters we simply
1211 * disable the interrupt reporting and update the event. When unthrottling we
1212 * reset the period and enable the interrupt reporting.
1213 */
1214static const struct arm_pmu armv6mpcore_pmu = {
1215	.id			= ARM_PERF_PMU_ID_V6MP,
1216	.handle_irq		= armv6pmu_handle_irq,
1217	.enable			= armv6pmu_enable_event,
1218	.disable		= armv6mpcore_pmu_disable_event,
1219	.event_map		= armv6mpcore_pmu_event_map,
1220	.raw_event		= armv6pmu_raw_event,
1221	.read_counter		= armv6pmu_read_counter,
1222	.write_counter		= armv6pmu_write_counter,
1223	.get_event_idx		= armv6pmu_get_event_idx,
1224	.start			= armv6pmu_start,
1225	.stop			= armv6pmu_stop,
1226	.num_events		= 3,
1227	.max_period		= (1LLU << 32) - 1,
1228};
1229
1230/*
1231 * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
1232 *
1233 * Copied from ARMv6 code, with the low level code inspired
1234 *  by the ARMv7 Oprofile code.
1235 *
1236 * Cortex-A8 has up to 4 configurable performance counters and
1237 *  a single cycle counter.
1238 * Cortex-A9 has up to 31 configurable performance counters and
1239 *  a single cycle counter.
1240 *
1241 * All counters can be enabled/disabled and IRQ masked separately. The cycle
1242 *  counter and all 4 performance counters together can be reset separately.
1243 */
1244
1245/* Common ARMv7 event types */
1246enum armv7_perf_types {
1247	ARMV7_PERFCTR_PMNC_SW_INCR		= 0x00,
1248	ARMV7_PERFCTR_IFETCH_MISS		= 0x01,
1249	ARMV7_PERFCTR_ITLB_MISS			= 0x02,
1250	ARMV7_PERFCTR_DCACHE_REFILL		= 0x03,
1251	ARMV7_PERFCTR_DCACHE_ACCESS		= 0x04,
1252	ARMV7_PERFCTR_DTLB_REFILL		= 0x05,
1253	ARMV7_PERFCTR_DREAD			= 0x06,
1254	ARMV7_PERFCTR_DWRITE			= 0x07,
1255
1256	ARMV7_PERFCTR_EXC_TAKEN			= 0x09,
1257	ARMV7_PERFCTR_EXC_EXECUTED		= 0x0A,
1258	ARMV7_PERFCTR_CID_WRITE			= 0x0B,
1259	/* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
1260	 * It counts:
1261	 *  - all branch instructions,
1262	 *  - instructions that explicitly write the PC,
1263	 *  - exception generating instructions.
1264	 */
1265	ARMV7_PERFCTR_PC_WRITE			= 0x0C,
1266	ARMV7_PERFCTR_PC_IMM_BRANCH		= 0x0D,
1267	ARMV7_PERFCTR_UNALIGNED_ACCESS		= 0x0F,
1268	ARMV7_PERFCTR_PC_BRANCH_MIS_PRED	= 0x10,
1269	ARMV7_PERFCTR_CLOCK_CYCLES		= 0x11,
1270
1271	ARMV7_PERFCTR_PC_BRANCH_MIS_USED	= 0x12,
1272
1273	ARMV7_PERFCTR_CPU_CYCLES		= 0xFF
1274};
1275
1276/* ARMv7 Cortex-A8 specific event types */
1277enum armv7_a8_perf_types {
1278	ARMV7_PERFCTR_INSTR_EXECUTED		= 0x08,
1279
1280	ARMV7_PERFCTR_PC_PROC_RETURN		= 0x0E,
1281
1282	ARMV7_PERFCTR_WRITE_BUFFER_FULL		= 0x40,
1283	ARMV7_PERFCTR_L2_STORE_MERGED		= 0x41,
1284	ARMV7_PERFCTR_L2_STORE_BUFF		= 0x42,
1285	ARMV7_PERFCTR_L2_ACCESS			= 0x43,
1286	ARMV7_PERFCTR_L2_CACH_MISS		= 0x44,
1287	ARMV7_PERFCTR_AXI_READ_CYCLES		= 0x45,
1288	ARMV7_PERFCTR_AXI_WRITE_CYCLES		= 0x46,
1289	ARMV7_PERFCTR_MEMORY_REPLAY		= 0x47,
1290	ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY	= 0x48,
1291	ARMV7_PERFCTR_L1_DATA_MISS		= 0x49,
1292	ARMV7_PERFCTR_L1_INST_MISS		= 0x4A,
1293	ARMV7_PERFCTR_L1_DATA_COLORING		= 0x4B,
1294	ARMV7_PERFCTR_L1_NEON_DATA		= 0x4C,
1295	ARMV7_PERFCTR_L1_NEON_CACH_DATA		= 0x4D,
1296	ARMV7_PERFCTR_L2_NEON			= 0x4E,
1297	ARMV7_PERFCTR_L2_NEON_HIT		= 0x4F,
1298	ARMV7_PERFCTR_L1_INST			= 0x50,
1299	ARMV7_PERFCTR_PC_RETURN_MIS_PRED	= 0x51,
1300	ARMV7_PERFCTR_PC_BRANCH_FAILED		= 0x52,
1301	ARMV7_PERFCTR_PC_BRANCH_TAKEN		= 0x53,
1302	ARMV7_PERFCTR_PC_BRANCH_EXECUTED	= 0x54,
1303	ARMV7_PERFCTR_OP_EXECUTED		= 0x55,
1304	ARMV7_PERFCTR_CYCLES_INST_STALL		= 0x56,
1305	ARMV7_PERFCTR_CYCLES_INST		= 0x57,
1306	ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL	= 0x58,
1307	ARMV7_PERFCTR_CYCLES_NEON_INST_STALL	= 0x59,
1308	ARMV7_PERFCTR_NEON_CYCLES		= 0x5A,
1309
1310	ARMV7_PERFCTR_PMU0_EVENTS		= 0x70,
1311	ARMV7_PERFCTR_PMU1_EVENTS		= 0x71,
1312	ARMV7_PERFCTR_PMU_EVENTS		= 0x72,
1313};
1314
1315/* ARMv7 Cortex-A9 specific event types */
1316enum armv7_a9_perf_types {
1317	ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC	= 0x40,
1318	ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC	= 0x41,
1319	ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC	= 0x42,
1320
1321	ARMV7_PERFCTR_COHERENT_LINE_MISS	= 0x50,
1322	ARMV7_PERFCTR_COHERENT_LINE_HIT		= 0x51,
1323
1324	ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES	= 0x60,
1325	ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES	= 0x61,
1326	ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES	= 0x62,
1327	ARMV7_PERFCTR_STREX_EXECUTED_PASSED	= 0x63,
1328	ARMV7_PERFCTR_STREX_EXECUTED_FAILED	= 0x64,
1329	ARMV7_PERFCTR_DATA_EVICTION		= 0x65,
1330	ARMV7_PERFCTR_ISSUE_STAGE_NO_INST	= 0x66,
1331	ARMV7_PERFCTR_ISSUE_STAGE_EMPTY		= 0x67,
1332	ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE	= 0x68,
1333
1334	ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS	= 0x6E,
1335
1336	ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST	= 0x70,
1337	ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST	= 0x71,
1338	ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST	= 0x72,
1339	ARMV7_PERFCTR_FP_EXECUTED_INST		= 0x73,
1340	ARMV7_PERFCTR_NEON_EXECUTED_INST	= 0x74,
1341
1342	ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES	= 0x80,
1343	ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES	= 0x81,
1344	ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES	= 0x82,
1345	ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES	= 0x83,
1346	ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES	= 0x84,
1347	ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES 	= 0x85,
1348	ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES	= 0x86,
1349
1350	ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES	= 0x8A,
1351	ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES	= 0x8B,
1352
1353	ARMV7_PERFCTR_ISB_INST			= 0x90,
1354	ARMV7_PERFCTR_DSB_INST			= 0x91,
1355	ARMV7_PERFCTR_DMB_INST			= 0x92,
1356	ARMV7_PERFCTR_EXT_INTERRUPTS		= 0x93,
1357
1358	ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED	= 0xA0,
1359	ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED	= 0xA1,
1360	ARMV7_PERFCTR_PLE_FIFO_FLUSH		= 0xA2,
1361	ARMV7_PERFCTR_PLE_RQST_COMPLETED	= 0xA3,
1362	ARMV7_PERFCTR_PLE_FIFO_OVERFLOW		= 0xA4,
1363	ARMV7_PERFCTR_PLE_RQST_PROG		= 0xA5
1364};
1365
1366/*
1367 * Cortex-A8 HW events mapping
1368 *
1369 * The hardware events that we support. We do support cache operations but
1370 * we have harvard caches and no way to combine instruction and data
1371 * accesses/misses in hardware.
1372 */
1373static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
1374	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
1375	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV7_PERFCTR_INSTR_EXECUTED,
1376	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
1377	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
1378	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1379	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1380	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
1381};
1382
1383static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1384					  [PERF_COUNT_HW_CACHE_OP_MAX]
1385					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1386	[C(L1D)] = {
1387		/*
1388		 * The performance counters don't differentiate between read
1389		 * and write accesses/misses so this isn't strictly correct,
1390		 * but it's the best we can do. Writes and reads get
1391		 * combined.
1392		 */
1393		[C(OP_READ)] = {
1394			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
1395			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
1396		},
1397		[C(OP_WRITE)] = {
1398			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
1399			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
1400		},
1401		[C(OP_PREFETCH)] = {
1402			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1403			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1404		},
1405	},
1406	[C(L1I)] = {
1407		[C(OP_READ)] = {
1408			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_INST,
1409			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_INST_MISS,
1410		},
1411		[C(OP_WRITE)] = {
1412			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L1_INST,
1413			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L1_INST_MISS,
1414		},
1415		[C(OP_PREFETCH)] = {
1416			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1417			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1418		},
1419	},
1420	[C(LL)] = {
1421		[C(OP_READ)] = {
1422			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L2_ACCESS,
1423			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACH_MISS,
1424		},
1425		[C(OP_WRITE)] = {
1426			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_L2_ACCESS,
1427			[C(RESULT_MISS)]	= ARMV7_PERFCTR_L2_CACH_MISS,
1428		},
1429		[C(OP_PREFETCH)] = {
1430			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1431			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1432		},
1433	},
1434	[C(DTLB)] = {
1435		/*
1436		 * Only ITLB misses and DTLB refills are supported.
1437		 * If users want the DTLB refills misses a raw counter
1438		 * must be used.
1439		 */
1440		[C(OP_READ)] = {
1441			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1442			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
1443		},
1444		[C(OP_WRITE)] = {
1445			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1446			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
1447		},
1448		[C(OP_PREFETCH)] = {
1449			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1450			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1451		},
1452	},
1453	[C(ITLB)] = {
1454		[C(OP_READ)] = {
1455			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1456			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
1457		},
1458		[C(OP_WRITE)] = {
1459			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1460			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
1461		},
1462		[C(OP_PREFETCH)] = {
1463			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1464			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1465		},
1466	},
1467	[C(BPU)] = {
1468		[C(OP_READ)] = {
1469			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
1470			[C(RESULT_MISS)]
1471					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1472		},
1473		[C(OP_WRITE)] = {
1474			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
1475			[C(RESULT_MISS)]
1476					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1477		},
1478		[C(OP_PREFETCH)] = {
1479			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1480			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1481		},
1482	},
1483};
1484
1485/*
1486 * Cortex-A9 HW events mapping
1487 */
1488static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
1489	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
1490	[PERF_COUNT_HW_INSTRUCTIONS]	    =
1491					ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
1492	[PERF_COUNT_HW_CACHE_REFERENCES]    = ARMV7_PERFCTR_COHERENT_LINE_HIT,
1493	[PERF_COUNT_HW_CACHE_MISSES]	    = ARMV7_PERFCTR_COHERENT_LINE_MISS,
1494	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1495	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1496	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
1497};
1498
1499static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1500					  [PERF_COUNT_HW_CACHE_OP_MAX]
1501					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1502	[C(L1D)] = {
1503		/*
1504		 * The performance counters don't differentiate between read
1505		 * and write accesses/misses so this isn't strictly correct,
1506		 * but it's the best we can do. Writes and reads get
1507		 * combined.
1508		 */
1509		[C(OP_READ)] = {
1510			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
1511			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
1512		},
1513		[C(OP_WRITE)] = {
1514			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_DCACHE_ACCESS,
1515			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DCACHE_REFILL,
1516		},
1517		[C(OP_PREFETCH)] = {
1518			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1519			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1520		},
1521	},
1522	[C(L1I)] = {
1523		[C(OP_READ)] = {
1524			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1525			[C(RESULT_MISS)]	= ARMV7_PERFCTR_IFETCH_MISS,
1526		},
1527		[C(OP_WRITE)] = {
1528			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1529			[C(RESULT_MISS)]	= ARMV7_PERFCTR_IFETCH_MISS,
1530		},
1531		[C(OP_PREFETCH)] = {
1532			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1533			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1534		},
1535	},
1536	[C(LL)] = {
1537		[C(OP_READ)] = {
1538			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1539			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1540		},
1541		[C(OP_WRITE)] = {
1542			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1543			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1544		},
1545		[C(OP_PREFETCH)] = {
1546			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1547			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1548		},
1549	},
1550	[C(DTLB)] = {
1551		/*
1552		 * Only ITLB misses and DTLB refills are supported.
1553		 * If users want the DTLB refills misses a raw counter
1554		 * must be used.
1555		 */
1556		[C(OP_READ)] = {
1557			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1558			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
1559		},
1560		[C(OP_WRITE)] = {
1561			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1562			[C(RESULT_MISS)]	= ARMV7_PERFCTR_DTLB_REFILL,
1563		},
1564		[C(OP_PREFETCH)] = {
1565			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1566			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1567		},
1568	},
1569	[C(ITLB)] = {
1570		[C(OP_READ)] = {
1571			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1572			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
1573		},
1574		[C(OP_WRITE)] = {
1575			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1576			[C(RESULT_MISS)]	= ARMV7_PERFCTR_ITLB_MISS,
1577		},
1578		[C(OP_PREFETCH)] = {
1579			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1580			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1581		},
1582	},
1583	[C(BPU)] = {
1584		[C(OP_READ)] = {
1585			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
1586			[C(RESULT_MISS)]
1587					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1588		},
1589		[C(OP_WRITE)] = {
1590			[C(RESULT_ACCESS)]	= ARMV7_PERFCTR_PC_WRITE,
1591			[C(RESULT_MISS)]
1592					= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1593		},
1594		[C(OP_PREFETCH)] = {
1595			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
1596			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
1597		},
1598	},
1599};
1600
1601/*
1602 * Perf Events counters
1603 */
1604enum armv7_counters {
1605	ARMV7_CYCLE_COUNTER 		= 1,	/* Cycle counter */
1606	ARMV7_COUNTER0			= 2,	/* First event counter */
1607};
1608
1609/*
1610 * The cycle counter is ARMV7_CYCLE_COUNTER.
1611 * The first event counter is ARMV7_COUNTER0.
1612 * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
1613 */
1614#define	ARMV7_COUNTER_LAST	(ARMV7_COUNTER0 + armpmu->num_events - 1)
1615
1616/*
1617 * ARMv7 low level PMNC access
1618 */
1619
1620/*
1621 * Per-CPU PMNC: config reg
1622 */
1623#define ARMV7_PMNC_E		(1 << 0) /* Enable all counters */
1624#define ARMV7_PMNC_P		(1 << 1) /* Reset all counters */
1625#define ARMV7_PMNC_C		(1 << 2) /* Cycle counter reset */
1626#define ARMV7_PMNC_D		(1 << 3) /* CCNT counts every 64th cpu cycle */
1627#define ARMV7_PMNC_X		(1 << 4) /* Export to ETM */
1628#define ARMV7_PMNC_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/
1629#define	ARMV7_PMNC_N_SHIFT	11	 /* Number of counters supported */
1630#define	ARMV7_PMNC_N_MASK	0x1f
1631#define	ARMV7_PMNC_MASK		0x3f	 /* Mask for writable bits */
1632
1633/*
1634 * Available counters
1635 */
1636#define ARMV7_CNT0 		0	/* First event counter */
1637#define ARMV7_CCNT 		31	/* Cycle counter */
1638
1639/* Perf Event to low level counters mapping */
1640#define ARMV7_EVENT_CNT_TO_CNTx	(ARMV7_COUNTER0 - ARMV7_CNT0)
1641
1642/*
1643 * CNTENS: counters enable reg
1644 */
1645#define ARMV7_CNTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1646#define ARMV7_CNTENS_C		(1 << ARMV7_CCNT)
1647
1648/*
1649 * CNTENC: counters disable reg
1650 */
1651#define ARMV7_CNTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1652#define ARMV7_CNTENC_C		(1 << ARMV7_CCNT)
1653
1654/*
1655 * INTENS: counters overflow interrupt enable reg
1656 */
1657#define ARMV7_INTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1658#define ARMV7_INTENS_C		(1 << ARMV7_CCNT)
1659
1660/*
1661 * INTENC: counters overflow interrupt disable reg
1662 */
1663#define ARMV7_INTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1664#define ARMV7_INTENC_C		(1 << ARMV7_CCNT)
1665
1666/*
1667 * EVTSEL: Event selection reg
1668 */
1669#define	ARMV7_EVTSEL_MASK	0xff		/* Mask for writable bits */
1670
1671/*
1672 * SELECT: Counter selection reg
1673 */
1674#define	ARMV7_SELECT_MASK	0x1f		/* Mask for writable bits */
1675
1676/*
1677 * FLAG: counters overflow flag status reg
1678 */
1679#define ARMV7_FLAG_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1680#define ARMV7_FLAG_C		(1 << ARMV7_CCNT)
1681#define	ARMV7_FLAG_MASK		0xffffffff	/* Mask for writable bits */
1682#define	ARMV7_OVERFLOWED_MASK	ARMV7_FLAG_MASK
1683
1684static inline unsigned long armv7_pmnc_read(void)
1685{
1686	u32 val;
1687	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
1688	return val;
1689}
1690
1691static inline void armv7_pmnc_write(unsigned long val)
1692{
1693	val &= ARMV7_PMNC_MASK;
1694	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
1695}
1696
1697static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
1698{
1699	return pmnc & ARMV7_OVERFLOWED_MASK;
1700}
1701
1702static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
1703					enum armv7_counters counter)
1704{
1705	int ret;
1706
1707	if (counter == ARMV7_CYCLE_COUNTER)
1708		ret = pmnc & ARMV7_FLAG_C;
1709	else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
1710		ret = pmnc & ARMV7_FLAG_P(counter);
1711	else
1712		pr_err("CPU%u checking wrong counter %d overflow status\n",
1713			smp_processor_id(), counter);
1714
1715	return ret;
1716}
1717
1718static inline int armv7_pmnc_select_counter(unsigned int idx)
1719{
1720	u32 val;
1721
1722	if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
1723		pr_err("CPU%u selecting wrong PMNC counter"
1724			" %d\n", smp_processor_id(), idx);
1725		return -1;
1726	}
1727
1728	val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
1729	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
1730
1731	return idx;
1732}
1733
1734static inline u32 armv7pmu_read_counter(int idx)
1735{
1736	unsigned long value = 0;
1737
1738	if (idx == ARMV7_CYCLE_COUNTER)
1739		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
1740	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1741		if (armv7_pmnc_select_counter(idx) == idx)
1742			asm volatile("mrc p15, 0, %0, c9, c13, 2"
1743				     : "=r" (value));
1744	} else
1745		pr_err("CPU%u reading wrong counter %d\n",
1746			smp_processor_id(), idx);
1747
1748	return value;
1749}
1750
1751static inline void armv7pmu_write_counter(int idx, u32 value)
1752{
1753	if (idx == ARMV7_CYCLE_COUNTER)
1754		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
1755	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1756		if (armv7_pmnc_select_counter(idx) == idx)
1757			asm volatile("mcr p15, 0, %0, c9, c13, 2"
1758				     : : "r" (value));
1759	} else
1760		pr_err("CPU%u writing wrong counter %d\n",
1761			smp_processor_id(), idx);
1762}
1763
1764static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
1765{
1766	if (armv7_pmnc_select_counter(idx) == idx) {
1767		val &= ARMV7_EVTSEL_MASK;
1768		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
1769	}
1770}
1771
1772static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
1773{
1774	u32 val;
1775
1776	if ((idx != ARMV7_CYCLE_COUNTER) &&
1777	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1778		pr_err("CPU%u enabling wrong PMNC counter"
1779			" %d\n", smp_processor_id(), idx);
1780		return -1;
1781	}
1782
1783	if (idx == ARMV7_CYCLE_COUNTER)
1784		val = ARMV7_CNTENS_C;
1785	else
1786		val = ARMV7_CNTENS_P(idx);
1787
1788	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
1789
1790	return idx;
1791}
1792
1793static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
1794{
1795	u32 val;
1796
1797
1798	if ((idx != ARMV7_CYCLE_COUNTER) &&
1799	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1800		pr_err("CPU%u disabling wrong PMNC counter"
1801			" %d\n", smp_processor_id(), idx);
1802		return -1;
1803	}
1804
1805	if (idx == ARMV7_CYCLE_COUNTER)
1806		val = ARMV7_CNTENC_C;
1807	else
1808		val = ARMV7_CNTENC_P(idx);
1809
1810	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
1811
1812	return idx;
1813}
1814
1815static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
1816{
1817	u32 val;
1818
1819	if ((idx != ARMV7_CYCLE_COUNTER) &&
1820	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1821		pr_err("CPU%u enabling wrong PMNC counter"
1822			" interrupt enable %d\n", smp_processor_id(), idx);
1823		return -1;
1824	}
1825
1826	if (idx == ARMV7_CYCLE_COUNTER)
1827		val = ARMV7_INTENS_C;
1828	else
1829		val = ARMV7_INTENS_P(idx);
1830
1831	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
1832
1833	return idx;
1834}
1835
1836static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
1837{
1838	u32 val;
1839
1840	if ((idx != ARMV7_CYCLE_COUNTER) &&
1841	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1842		pr_err("CPU%u disabling wrong PMNC counter"
1843			" interrupt enable %d\n", smp_processor_id(), idx);
1844		return -1;
1845	}
1846
1847	if (idx == ARMV7_CYCLE_COUNTER)
1848		val = ARMV7_INTENC_C;
1849	else
1850		val = ARMV7_INTENC_P(idx);
1851
1852	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
1853
1854	return idx;
1855}
1856
1857static inline u32 armv7_pmnc_getreset_flags(void)
1858{
1859	u32 val;
1860
1861	/* Read */
1862	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1863
1864	/* Write to clear flags */
1865	val &= ARMV7_FLAG_MASK;
1866	asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
1867
1868	return val;
1869}
1870
1871#ifdef DEBUG
1872static void armv7_pmnc_dump_regs(void)
1873{
1874	u32 val;
1875	unsigned int cnt;
1876
1877	printk(KERN_INFO "PMNC registers dump:\n");
1878
1879	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
1880	printk(KERN_INFO "PMNC  =0x%08x\n", val);
1881
1882	asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
1883	printk(KERN_INFO "CNTENS=0x%08x\n", val);
1884
1885	asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
1886	printk(KERN_INFO "INTENS=0x%08x\n", val);
1887
1888	asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1889	printk(KERN_INFO "FLAGS =0x%08x\n", val);
1890
1891	asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
1892	printk(KERN_INFO "SELECT=0x%08x\n", val);
1893
1894	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
1895	printk(KERN_INFO "CCNT  =0x%08x\n", val);
1896
1897	for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
1898		armv7_pmnc_select_counter(cnt);
1899		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
1900		printk(KERN_INFO "CNT[%d] count =0x%08x\n",
1901			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1902		asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
1903		printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
1904			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1905	}
1906}
1907#endif
1908
1909void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
1910{
1911	unsigned long flags;
1912
1913	/*
1914	 * Enable counter and interrupt, and set the counter to count
1915	 * the event that we're interested in.
1916	 */
1917	spin_lock_irqsave(&pmu_lock, flags);
1918
1919	/*
1920	 * Disable counter
1921	 */
1922	armv7_pmnc_disable_counter(idx);
1923
1924	/*
1925	 * Set event (if destined for PMNx counters)
1926	 * We don't need to set the event if it's a cycle count
1927	 */
1928	if (idx != ARMV7_CYCLE_COUNTER)
1929		armv7_pmnc_write_evtsel(idx, hwc->config_base);
1930
1931	/*
1932	 * Enable interrupt for this counter
1933	 */
1934	armv7_pmnc_enable_intens(idx);
1935
1936	/*
1937	 * Enable counter
1938	 */
1939	armv7_pmnc_enable_counter(idx);
1940
1941	spin_unlock_irqrestore(&pmu_lock, flags);
1942}
1943
1944static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
1945{
1946	unsigned long flags;
1947
1948	/*
1949	 * Disable counter and interrupt
1950	 */
1951	spin_lock_irqsave(&pmu_lock, flags);
1952
1953	/*
1954	 * Disable counter
1955	 */
1956	armv7_pmnc_disable_counter(idx);
1957
1958	/*
1959	 * Disable interrupt for this counter
1960	 */
1961	armv7_pmnc_disable_intens(idx);
1962
1963	spin_unlock_irqrestore(&pmu_lock, flags);
1964}
1965
1966static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
1967{
1968	unsigned long pmnc;
1969	struct perf_sample_data data;
1970	struct cpu_hw_events *cpuc;
1971	struct pt_regs *regs;
1972	int idx;
1973
1974	/*
1975	 * Get and reset the IRQ flags
1976	 */
1977	pmnc = armv7_pmnc_getreset_flags();
1978
1979	/*
1980	 * Did an overflow occur?
1981	 */
1982	if (!armv7_pmnc_has_overflowed(pmnc))
1983		return IRQ_NONE;
1984
1985	/*
1986	 * Handle the counter(s) overflow(s)
1987	 */
1988	regs = get_irq_regs();
1989
1990	perf_sample_data_init(&data, 0);
1991
1992	cpuc = &__get_cpu_var(cpu_hw_events);
1993	for (idx = 0; idx <= armpmu->num_events; ++idx) {
1994		struct perf_event *event = cpuc->events[idx];
1995		struct hw_perf_event *hwc;
1996
1997		if (!test_bit(idx, cpuc->active_mask))
1998			continue;
1999
2000		/*
2001		 * We have a single interrupt for all counters. Check that
2002		 * each counter has overflowed before we process it.
2003		 */
2004		if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
2005			continue;
2006
2007		hwc = &event->hw;
2008		armpmu_event_update(event, hwc, idx);
2009		data.period = event->hw.last_period;
2010		if (!armpmu_event_set_period(event, hwc, idx))
2011			continue;
2012
2013		if (perf_event_overflow(event, 0, &data, regs))
2014			armpmu->disable(hwc, idx);
2015	}
2016
2017	/*
2018	 * Handle the pending perf events.
2019	 *
2020	 * Note: this call *must* be run with interrupts disabled. For
2021	 * platforms that can have the PMU interrupts raised as an NMI, this
2022	 * will not work.
2023	 */
2024	perf_event_do_pending();
2025
2026	return IRQ_HANDLED;
2027}
2028
2029static void armv7pmu_start(void)
2030{
2031	unsigned long flags;
2032
2033	spin_lock_irqsave(&pmu_lock, flags);
2034	/* Enable all counters */
2035	armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
2036	spin_unlock_irqrestore(&pmu_lock, flags);
2037}
2038
2039static void armv7pmu_stop(void)
2040{
2041	unsigned long flags;
2042
2043	spin_lock_irqsave(&pmu_lock, flags);
2044	/* Disable all counters */
2045	armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
2046	spin_unlock_irqrestore(&pmu_lock, flags);
2047}
2048
2049static inline int armv7_a8_pmu_event_map(int config)
2050{
2051	int mapping = armv7_a8_perf_map[config];
2052	if (HW_OP_UNSUPPORTED == mapping)
2053		mapping = -EOPNOTSUPP;
2054	return mapping;
2055}
2056
2057static inline int armv7_a9_pmu_event_map(int config)
2058{
2059	int mapping = armv7_a9_perf_map[config];
2060	if (HW_OP_UNSUPPORTED == mapping)
2061		mapping = -EOPNOTSUPP;
2062	return mapping;
2063}
2064
2065static u64 armv7pmu_raw_event(u64 config)
2066{
2067	return config & 0xff;
2068}
2069
2070static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
2071				  struct hw_perf_event *event)
2072{
2073	int idx;
2074
2075	/* Always place a cycle counter into the cycle counter. */
2076	if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
2077		if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
2078			return -EAGAIN;
2079
2080		return ARMV7_CYCLE_COUNTER;
2081	} else {
2082		/*
2083		 * For anything other than a cycle counter, try and use
2084		 * the events counters
2085		 */
2086		for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
2087			if (!test_and_set_bit(idx, cpuc->used_mask))
2088				return idx;
2089		}
2090
2091		/* The counters are all in use. */
2092		return -EAGAIN;
2093	}
2094}
2095
2096static struct arm_pmu armv7pmu = {
2097	.handle_irq		= armv7pmu_handle_irq,
2098	.enable			= armv7pmu_enable_event,
2099	.disable		= armv7pmu_disable_event,
2100	.raw_event		= armv7pmu_raw_event,
2101	.read_counter		= armv7pmu_read_counter,
2102	.write_counter		= armv7pmu_write_counter,
2103	.get_event_idx		= armv7pmu_get_event_idx,
2104	.start			= armv7pmu_start,
2105	.stop			= armv7pmu_stop,
2106	.max_period		= (1LLU << 32) - 1,
2107};
2108
2109static u32 __init armv7_reset_read_pmnc(void)
2110{
2111	u32 nb_cnt;
2112
2113	/* Initialize & Reset PMNC: C and P bits */
2114	armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
2115
2116	/* Read the nb of CNTx counters supported from PMNC */
2117	nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
2118
2119	/* Add the CPU cycles counter and return */
2120	return nb_cnt + 1;
2121}
2122
2123/*
2124 * ARMv5 [xscale] Performance counter handling code.
2125 *
2126 * Based on xscale OProfile code.
2127 *
2128 * There are two variants of the xscale PMU that we support:
2129 * 	- xscale1pmu: 2 event counters and a cycle counter
2130 * 	- xscale2pmu: 4 event counters and a cycle counter
2131 * The two variants share event definitions, but have different
2132 * PMU structures.
2133 */
2134
2135enum xscale_perf_types {
2136	XSCALE_PERFCTR_ICACHE_MISS		= 0x00,
2137	XSCALE_PERFCTR_ICACHE_NO_DELIVER	= 0x01,
2138	XSCALE_PERFCTR_DATA_STALL		= 0x02,
2139	XSCALE_PERFCTR_ITLB_MISS		= 0x03,
2140	XSCALE_PERFCTR_DTLB_MISS		= 0x04,
2141	XSCALE_PERFCTR_BRANCH			= 0x05,
2142	XSCALE_PERFCTR_BRANCH_MISS		= 0x06,
2143	XSCALE_PERFCTR_INSTRUCTION		= 0x07,
2144	XSCALE_PERFCTR_DCACHE_FULL_STALL	= 0x08,
2145	XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG	= 0x09,
2146	XSCALE_PERFCTR_DCACHE_ACCESS		= 0x0A,
2147	XSCALE_PERFCTR_DCACHE_MISS		= 0x0B,
2148	XSCALE_PERFCTR_DCACHE_WRITE_BACK	= 0x0C,
2149	XSCALE_PERFCTR_PC_CHANGED		= 0x0D,
2150	XSCALE_PERFCTR_BCU_REQUEST		= 0x10,
2151	XSCALE_PERFCTR_BCU_FULL			= 0x11,
2152	XSCALE_PERFCTR_BCU_DRAIN		= 0x12,
2153	XSCALE_PERFCTR_BCU_ECC_NO_ELOG		= 0x14,
2154	XSCALE_PERFCTR_BCU_1_BIT_ERR		= 0x15,
2155	XSCALE_PERFCTR_RMW			= 0x16,
2156	/* XSCALE_PERFCTR_CCNT is not hardware defined */
2157	XSCALE_PERFCTR_CCNT			= 0xFE,
2158	XSCALE_PERFCTR_UNUSED			= 0xFF,
2159};
2160
2161enum xscale_counters {
2162	XSCALE_CYCLE_COUNTER	= 1,
2163	XSCALE_COUNTER0,
2164	XSCALE_COUNTER1,
2165	XSCALE_COUNTER2,
2166	XSCALE_COUNTER3,
2167};
2168
2169static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
2170	[PERF_COUNT_HW_CPU_CYCLES]	    = XSCALE_PERFCTR_CCNT,
2171	[PERF_COUNT_HW_INSTRUCTIONS]	    = XSCALE_PERFCTR_INSTRUCTION,
2172	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
2173	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
2174	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
2175	[PERF_COUNT_HW_BRANCH_MISSES]	    = XSCALE_PERFCTR_BRANCH_MISS,
2176	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
2177};
2178
2179static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
2180					   [PERF_COUNT_HW_CACHE_OP_MAX]
2181					   [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
2182	[C(L1D)] = {
2183		[C(OP_READ)] = {
2184			[C(RESULT_ACCESS)]	= XSCALE_PERFCTR_DCACHE_ACCESS,
2185			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DCACHE_MISS,
2186		},
2187		[C(OP_WRITE)] = {
2188			[C(RESULT_ACCESS)]	= XSCALE_PERFCTR_DCACHE_ACCESS,
2189			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DCACHE_MISS,
2190		},
2191		[C(OP_PREFETCH)] = {
2192			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2193			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
2194		},
2195	},
2196	[C(L1I)] = {
2197		[C(OP_READ)] = {
2198			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2199			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ICACHE_MISS,
2200		},
2201		[C(OP_WRITE)] = {
2202			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2203			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ICACHE_MISS,
2204		},
2205		[C(OP_PREFETCH)] = {
2206			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2207			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
2208		},
2209	},
2210	[C(LL)] = {
2211		[C(OP_READ)] = {
2212			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2213			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
2214		},
2215		[C(OP_WRITE)] = {
2216			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2217			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
2218		},
2219		[C(OP_PREFETCH)] = {
2220			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2221			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
2222		},
2223	},
2224	[C(DTLB)] = {
2225		[C(OP_READ)] = {
2226			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2227			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DTLB_MISS,
2228		},
2229		[C(OP_WRITE)] = {
2230			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2231			[C(RESULT_MISS)]	= XSCALE_PERFCTR_DTLB_MISS,
2232		},
2233		[C(OP_PREFETCH)] = {
2234			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2235			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
2236		},
2237	},
2238	[C(ITLB)] = {
2239		[C(OP_READ)] = {
2240			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2241			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ITLB_MISS,
2242		},
2243		[C(OP_WRITE)] = {
2244			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2245			[C(RESULT_MISS)]	= XSCALE_PERFCTR_ITLB_MISS,
2246		},
2247		[C(OP_PREFETCH)] = {
2248			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2249			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
2250		},
2251	},
2252	[C(BPU)] = {
2253		[C(OP_READ)] = {
2254			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2255			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
2256		},
2257		[C(OP_WRITE)] = {
2258			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2259			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
2260		},
2261		[C(OP_PREFETCH)] = {
2262			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
2263			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
2264		},
2265	},
2266};
2267
2268#define	XSCALE_PMU_ENABLE	0x001
2269#define XSCALE_PMN_RESET	0x002
2270#define	XSCALE_CCNT_RESET	0x004
2271#define	XSCALE_PMU_RESET	(CCNT_RESET | PMN_RESET)
2272#define XSCALE_PMU_CNT64	0x008
2273
2274static inline int
2275xscalepmu_event_map(int config)
2276{
2277	int mapping = xscale_perf_map[config];
2278	if (HW_OP_UNSUPPORTED == mapping)
2279		mapping = -EOPNOTSUPP;
2280	return mapping;
2281}
2282
2283static u64
2284xscalepmu_raw_event(u64 config)
2285{
2286	return config & 0xff;
2287}
2288
2289#define XSCALE1_OVERFLOWED_MASK	0x700
2290#define XSCALE1_CCOUNT_OVERFLOW	0x400
2291#define XSCALE1_COUNT0_OVERFLOW	0x100
2292#define XSCALE1_COUNT1_OVERFLOW	0x200
2293#define XSCALE1_CCOUNT_INT_EN	0x040
2294#define XSCALE1_COUNT0_INT_EN	0x010
2295#define XSCALE1_COUNT1_INT_EN	0x020
2296#define XSCALE1_COUNT0_EVT_SHFT	12
2297#define XSCALE1_COUNT0_EVT_MASK	(0xff << XSCALE1_COUNT0_EVT_SHFT)
2298#define XSCALE1_COUNT1_EVT_SHFT	20
2299#define XSCALE1_COUNT1_EVT_MASK	(0xff << XSCALE1_COUNT1_EVT_SHFT)
2300
2301static inline u32
2302xscale1pmu_read_pmnc(void)
2303{
2304	u32 val;
2305	asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
2306	return val;
2307}
2308
2309static inline void
2310xscale1pmu_write_pmnc(u32 val)
2311{
2312	/* upper 4bits and 7, 11 are write-as-0 */
2313	val &= 0xffff77f;
2314	asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
2315}
2316
2317static inline int
2318xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
2319					enum xscale_counters counter)
2320{
2321	int ret = 0;
2322
2323	switch (counter) {
2324	case XSCALE_CYCLE_COUNTER:
2325		ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
2326		break;
2327	case XSCALE_COUNTER0:
2328		ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
2329		break;
2330	case XSCALE_COUNTER1:
2331		ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
2332		break;
2333	default:
2334		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2335	}
2336
2337	return ret;
2338}
2339
2340static irqreturn_t
2341xscale1pmu_handle_irq(int irq_num, void *dev)
2342{
2343	unsigned long pmnc;
2344	struct perf_sample_data data;
2345	struct cpu_hw_events *cpuc;
2346	struct pt_regs *regs;
2347	int idx;
2348
2349	pmnc = xscale1pmu_read_pmnc();
2350
2351	/*
2352	 * Write the value back to clear the overflow flags. Overflow
2353	 * flags remain in pmnc for use below. We also disable the PMU
2354	 * while we process the interrupt.
2355	 */
2356	xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2357
2358	if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
2359		return IRQ_NONE;
2360
2361	regs = get_irq_regs();
2362
2363	perf_sample_data_init(&data, 0);
2364
2365	cpuc = &__get_cpu_var(cpu_hw_events);
2366	for (idx = 0; idx <= armpmu->num_events; ++idx) {
2367		struct perf_event *event = cpuc->events[idx];
2368		struct hw_perf_event *hwc;
2369
2370		if (!test_bit(idx, cpuc->active_mask))
2371			continue;
2372
2373		if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
2374			continue;
2375
2376		hwc = &event->hw;
2377		armpmu_event_update(event, hwc, idx);
2378		data.period = event->hw.last_period;
2379		if (!armpmu_event_set_period(event, hwc, idx))
2380			continue;
2381
2382		if (perf_event_overflow(event, 0, &data, regs))
2383			armpmu->disable(hwc, idx);
2384	}
2385
2386	perf_event_do_pending();
2387
2388	/*
2389	 * Re-enable the PMU.
2390	 */
2391	pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2392	xscale1pmu_write_pmnc(pmnc);
2393
2394	return IRQ_HANDLED;
2395}
2396
2397static void
2398xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
2399{
2400	unsigned long val, mask, evt, flags;
2401
2402	switch (idx) {
2403	case XSCALE_CYCLE_COUNTER:
2404		mask = 0;
2405		evt = XSCALE1_CCOUNT_INT_EN;
2406		break;
2407	case XSCALE_COUNTER0:
2408		mask = XSCALE1_COUNT0_EVT_MASK;
2409		evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
2410			XSCALE1_COUNT0_INT_EN;
2411		break;
2412	case XSCALE_COUNTER1:
2413		mask = XSCALE1_COUNT1_EVT_MASK;
2414		evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
2415			XSCALE1_COUNT1_INT_EN;
2416		break;
2417	default:
2418		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2419		return;
2420	}
2421
2422	spin_lock_irqsave(&pmu_lock, flags);
2423	val = xscale1pmu_read_pmnc();
2424	val &= ~mask;
2425	val |= evt;
2426	xscale1pmu_write_pmnc(val);
2427	spin_unlock_irqrestore(&pmu_lock, flags);
2428}
2429
2430static void
2431xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
2432{
2433	unsigned long val, mask, evt, flags;
2434
2435	switch (idx) {
2436	case XSCALE_CYCLE_COUNTER:
2437		mask = XSCALE1_CCOUNT_INT_EN;
2438		evt = 0;
2439		break;
2440	case XSCALE_COUNTER0:
2441		mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
2442		evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
2443		break;
2444	case XSCALE_COUNTER1:
2445		mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
2446		evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
2447		break;
2448	default:
2449		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2450		return;
2451	}
2452
2453	spin_lock_irqsave(&pmu_lock, flags);
2454	val = xscale1pmu_read_pmnc();
2455	val &= ~mask;
2456	val |= evt;
2457	xscale1pmu_write_pmnc(val);
2458	spin_unlock_irqrestore(&pmu_lock, flags);
2459}
2460
2461static int
2462xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
2463			struct hw_perf_event *event)
2464{
2465	if (XSCALE_PERFCTR_CCNT == event->config_base) {
2466		if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
2467			return -EAGAIN;
2468
2469		return XSCALE_CYCLE_COUNTER;
2470	} else {
2471		if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) {
2472			return XSCALE_COUNTER1;
2473		}
2474
2475		if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) {
2476			return XSCALE_COUNTER0;
2477		}
2478
2479		return -EAGAIN;
2480	}
2481}
2482
2483static void
2484xscale1pmu_start(void)
2485{
2486	unsigned long flags, val;
2487
2488	spin_lock_irqsave(&pmu_lock, flags);
2489	val = xscale1pmu_read_pmnc();
2490	val |= XSCALE_PMU_ENABLE;
2491	xscale1pmu_write_pmnc(val);
2492	spin_unlock_irqrestore(&pmu_lock, flags);
2493}
2494
2495static void
2496xscale1pmu_stop(void)
2497{
2498	unsigned long flags, val;
2499
2500	spin_lock_irqsave(&pmu_lock, flags);
2501	val = xscale1pmu_read_pmnc();
2502	val &= ~XSCALE_PMU_ENABLE;
2503	xscale1pmu_write_pmnc(val);
2504	spin_unlock_irqrestore(&pmu_lock, flags);
2505}
2506
2507static inline u32
2508xscale1pmu_read_counter(int counter)
2509{
2510	u32 val = 0;
2511
2512	switch (counter) {
2513	case XSCALE_CYCLE_COUNTER:
2514		asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
2515		break;
2516	case XSCALE_COUNTER0:
2517		asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
2518		break;
2519	case XSCALE_COUNTER1:
2520		asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
2521		break;
2522	}
2523
2524	return val;
2525}
2526
2527static inline void
2528xscale1pmu_write_counter(int counter, u32 val)
2529{
2530	switch (counter) {
2531	case XSCALE_CYCLE_COUNTER:
2532		asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
2533		break;
2534	case XSCALE_COUNTER0:
2535		asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
2536		break;
2537	case XSCALE_COUNTER1:
2538		asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
2539		break;
2540	}
2541}
2542
2543static const struct arm_pmu xscale1pmu = {
2544	.id		= ARM_PERF_PMU_ID_XSCALE1,
2545	.handle_irq	= xscale1pmu_handle_irq,
2546	.enable		= xscale1pmu_enable_event,
2547	.disable	= xscale1pmu_disable_event,
2548	.event_map	= xscalepmu_event_map,
2549	.raw_event	= xscalepmu_raw_event,
2550	.read_counter	= xscale1pmu_read_counter,
2551	.write_counter	= xscale1pmu_write_counter,
2552	.get_event_idx	= xscale1pmu_get_event_idx,
2553	.start		= xscale1pmu_start,
2554	.stop		= xscale1pmu_stop,
2555	.num_events	= 3,
2556	.max_period	= (1LLU << 32) - 1,
2557};
2558
2559#define XSCALE2_OVERFLOWED_MASK	0x01f
2560#define XSCALE2_CCOUNT_OVERFLOW	0x001
2561#define XSCALE2_COUNT0_OVERFLOW	0x002
2562#define XSCALE2_COUNT1_OVERFLOW	0x004
2563#define XSCALE2_COUNT2_OVERFLOW	0x008
2564#define XSCALE2_COUNT3_OVERFLOW	0x010
2565#define XSCALE2_CCOUNT_INT_EN	0x001
2566#define XSCALE2_COUNT0_INT_EN	0x002
2567#define XSCALE2_COUNT1_INT_EN	0x004
2568#define XSCALE2_COUNT2_INT_EN	0x008
2569#define XSCALE2_COUNT3_INT_EN	0x010
2570#define XSCALE2_COUNT0_EVT_SHFT	0
2571#define XSCALE2_COUNT0_EVT_MASK	(0xff << XSCALE2_COUNT0_EVT_SHFT)
2572#define XSCALE2_COUNT1_EVT_SHFT	8
2573#define XSCALE2_COUNT1_EVT_MASK	(0xff << XSCALE2_COUNT1_EVT_SHFT)
2574#define XSCALE2_COUNT2_EVT_SHFT	16
2575#define XSCALE2_COUNT2_EVT_MASK	(0xff << XSCALE2_COUNT2_EVT_SHFT)
2576#define XSCALE2_COUNT3_EVT_SHFT	24
2577#define XSCALE2_COUNT3_EVT_MASK	(0xff << XSCALE2_COUNT3_EVT_SHFT)
2578
2579static inline u32
2580xscale2pmu_read_pmnc(void)
2581{
2582	u32 val;
2583	asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
2584	/* bits 1-2 and 4-23 are read-unpredictable */
2585	return val & 0xff000009;
2586}
2587
2588static inline void
2589xscale2pmu_write_pmnc(u32 val)
2590{
2591	/* bits 4-23 are write-as-0, 24-31 are write ignored */
2592	val &= 0xf;
2593	asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
2594}
2595
2596static inline u32
2597xscale2pmu_read_overflow_flags(void)
2598{
2599	u32 val;
2600	asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
2601	return val;
2602}
2603
2604static inline void
2605xscale2pmu_write_overflow_flags(u32 val)
2606{
2607	asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
2608}
2609
2610static inline u32
2611xscale2pmu_read_event_select(void)
2612{
2613	u32 val;
2614	asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
2615	return val;
2616}
2617
2618static inline void
2619xscale2pmu_write_event_select(u32 val)
2620{
2621	asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
2622}
2623
2624static inline u32
2625xscale2pmu_read_int_enable(void)
2626{
2627	u32 val;
2628	asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
2629	return val;
2630}
2631
2632static void
2633xscale2pmu_write_int_enable(u32 val)
2634{
2635	asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
2636}
2637
2638static inline int
2639xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
2640					enum xscale_counters counter)
2641{
2642	int ret = 0;
2643
2644	switch (counter) {
2645	case XSCALE_CYCLE_COUNTER:
2646		ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
2647		break;
2648	case XSCALE_COUNTER0:
2649		ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
2650		break;
2651	case XSCALE_COUNTER1:
2652		ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
2653		break;
2654	case XSCALE_COUNTER2:
2655		ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
2656		break;
2657	case XSCALE_COUNTER3:
2658		ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
2659		break;
2660	default:
2661		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2662	}
2663
2664	return ret;
2665}
2666
2667static irqreturn_t
2668xscale2pmu_handle_irq(int irq_num, void *dev)
2669{
2670	unsigned long pmnc, of_flags;
2671	struct perf_sample_data data;
2672	struct cpu_hw_events *cpuc;
2673	struct pt_regs *regs;
2674	int idx;
2675
2676	/* Disable the PMU. */
2677	pmnc = xscale2pmu_read_pmnc();
2678	xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2679
2680	/* Check the overflow flag register. */
2681	of_flags = xscale2pmu_read_overflow_flags();
2682	if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
2683		return IRQ_NONE;
2684
2685	/* Clear the overflow bits. */
2686	xscale2pmu_write_overflow_flags(of_flags);
2687
2688	regs = get_irq_regs();
2689
2690	perf_sample_data_init(&data, 0);
2691
2692	cpuc = &__get_cpu_var(cpu_hw_events);
2693	for (idx = 0; idx <= armpmu->num_events; ++idx) {
2694		struct perf_event *event = cpuc->events[idx];
2695		struct hw_perf_event *hwc;
2696
2697		if (!test_bit(idx, cpuc->active_mask))
2698			continue;
2699
2700		if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
2701			continue;
2702
2703		hwc = &event->hw;
2704		armpmu_event_update(event, hwc, idx);
2705		data.period = event->hw.last_period;
2706		if (!armpmu_event_set_period(event, hwc, idx))
2707			continue;
2708
2709		if (perf_event_overflow(event, 0, &data, regs))
2710			armpmu->disable(hwc, idx);
2711	}
2712
2713	perf_event_do_pending();
2714
2715	/*
2716	 * Re-enable the PMU.
2717	 */
2718	pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2719	xscale2pmu_write_pmnc(pmnc);
2720
2721	return IRQ_HANDLED;
2722}
2723
2724static void
2725xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
2726{
2727	unsigned long flags, ien, evtsel;
2728
2729	ien = xscale2pmu_read_int_enable();
2730	evtsel = xscale2pmu_read_event_select();
2731
2732	switch (idx) {
2733	case XSCALE_CYCLE_COUNTER:
2734		ien |= XSCALE2_CCOUNT_INT_EN;
2735		break;
2736	case XSCALE_COUNTER0:
2737		ien |= XSCALE2_COUNT0_INT_EN;
2738		evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2739		evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
2740		break;
2741	case XSCALE_COUNTER1:
2742		ien |= XSCALE2_COUNT1_INT_EN;
2743		evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2744		evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
2745		break;
2746	case XSCALE_COUNTER2:
2747		ien |= XSCALE2_COUNT2_INT_EN;
2748		evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2749		evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
2750		break;
2751	case XSCALE_COUNTER3:
2752		ien |= XSCALE2_COUNT3_INT_EN;
2753		evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2754		evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
2755		break;
2756	default:
2757		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2758		return;
2759	}
2760
2761	spin_lock_irqsave(&pmu_lock, flags);
2762	xscale2pmu_write_event_select(evtsel);
2763	xscale2pmu_write_int_enable(ien);
2764	spin_unlock_irqrestore(&pmu_lock, flags);
2765}
2766
2767static void
2768xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
2769{
2770	unsigned long flags, ien, evtsel;
2771
2772	ien = xscale2pmu_read_int_enable();
2773	evtsel = xscale2pmu_read_event_select();
2774
2775	switch (idx) {
2776	case XSCALE_CYCLE_COUNTER:
2777		ien &= ~XSCALE2_CCOUNT_INT_EN;
2778		break;
2779	case XSCALE_COUNTER0:
2780		ien &= ~XSCALE2_COUNT0_INT_EN;
2781		evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2782		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
2783		break;
2784	case XSCALE_COUNTER1:
2785		ien &= ~XSCALE2_COUNT1_INT_EN;
2786		evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2787		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
2788		break;
2789	case XSCALE_COUNTER2:
2790		ien &= ~XSCALE2_COUNT2_INT_EN;
2791		evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2792		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
2793		break;
2794	case XSCALE_COUNTER3:
2795		ien &= ~XSCALE2_COUNT3_INT_EN;
2796		evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2797		evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
2798		break;
2799	default:
2800		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2801		return;
2802	}
2803
2804	spin_lock_irqsave(&pmu_lock, flags);
2805	xscale2pmu_write_event_select(evtsel);
2806	xscale2pmu_write_int_enable(ien);
2807	spin_unlock_irqrestore(&pmu_lock, flags);
2808}
2809
2810static int
2811xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
2812			struct hw_perf_event *event)
2813{
2814	int idx = xscale1pmu_get_event_idx(cpuc, event);
2815	if (idx >= 0)
2816		goto out;
2817
2818	if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
2819		idx = XSCALE_COUNTER3;
2820	else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
2821		idx = XSCALE_COUNTER2;
2822out:
2823	return idx;
2824}
2825
2826static void
2827xscale2pmu_start(void)
2828{
2829	unsigned long flags, val;
2830
2831	spin_lock_irqsave(&pmu_lock, flags);
2832	val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
2833	val |= XSCALE_PMU_ENABLE;
2834	xscale2pmu_write_pmnc(val);
2835	spin_unlock_irqrestore(&pmu_lock, flags);
2836}
2837
2838static void
2839xscale2pmu_stop(void)
2840{
2841	unsigned long flags, val;
2842
2843	spin_lock_irqsave(&pmu_lock, flags);
2844	val = xscale2pmu_read_pmnc();
2845	val &= ~XSCALE_PMU_ENABLE;
2846	xscale2pmu_write_pmnc(val);
2847	spin_unlock_irqrestore(&pmu_lock, flags);
2848}
2849
2850static inline u32
2851xscale2pmu_read_counter(int counter)
2852{
2853	u32 val = 0;
2854
2855	switch (counter) {
2856	case XSCALE_CYCLE_COUNTER:
2857		asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
2858		break;
2859	case XSCALE_COUNTER0:
2860		asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
2861		break;
2862	case XSCALE_COUNTER1:
2863		asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
2864		break;
2865	case XSCALE_COUNTER2:
2866		asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
2867		break;
2868	case XSCALE_COUNTER3:
2869		asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
2870		break;
2871	}
2872
2873	return val;
2874}
2875
2876static inline void
2877xscale2pmu_write_counter(int counter, u32 val)
2878{
2879	switch (counter) {
2880	case XSCALE_CYCLE_COUNTER:
2881		asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
2882		break;
2883	case XSCALE_COUNTER0:
2884		asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
2885		break;
2886	case XSCALE_COUNTER1:
2887		asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
2888		break;
2889	case XSCALE_COUNTER2:
2890		asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
2891		break;
2892	case XSCALE_COUNTER3:
2893		asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
2894		break;
2895	}
2896}
2897
2898static const struct arm_pmu xscale2pmu = {
2899	.id		= ARM_PERF_PMU_ID_XSCALE2,
2900	.handle_irq	= xscale2pmu_handle_irq,
2901	.enable		= xscale2pmu_enable_event,
2902	.disable	= xscale2pmu_disable_event,
2903	.event_map	= xscalepmu_event_map,
2904	.raw_event	= xscalepmu_raw_event,
2905	.read_counter	= xscale2pmu_read_counter,
2906	.write_counter	= xscale2pmu_write_counter,
2907	.get_event_idx	= xscale2pmu_get_event_idx,
2908	.start		= xscale2pmu_start,
2909	.stop		= xscale2pmu_stop,
2910	.num_events	= 5,
2911	.max_period	= (1LLU << 32) - 1,
2912};
2913
2914static int __init
2915init_hw_perf_events(void)
2916{
2917	unsigned long cpuid = read_cpuid_id();
2918	unsigned long implementor = (cpuid & 0xFF000000) >> 24;
2919	unsigned long part_number = (cpuid & 0xFFF0);
2920
2921	/* ARM Ltd CPUs. */
2922	if (0x41 == implementor) {
2923		switch (part_number) {
2924		case 0xB360:	/* ARM1136 */
2925		case 0xB560:	/* ARM1156 */
2926		case 0xB760:	/* ARM1176 */
2927			armpmu = &armv6pmu;
2928			memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
2929					sizeof(armv6_perf_cache_map));
2930			perf_max_events	= armv6pmu.num_events;
2931			break;
2932		case 0xB020:	/* ARM11mpcore */
2933			armpmu = &armv6mpcore_pmu;
2934			memcpy(armpmu_perf_cache_map,
2935			       armv6mpcore_perf_cache_map,
2936			       sizeof(armv6mpcore_perf_cache_map));
2937			perf_max_events = armv6mpcore_pmu.num_events;
2938			break;
2939		case 0xC080:	/* Cortex-A8 */
2940			armv7pmu.id = ARM_PERF_PMU_ID_CA8;
2941			memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map,
2942				sizeof(armv7_a8_perf_cache_map));
2943			armv7pmu.event_map = armv7_a8_pmu_event_map;
2944			armpmu = &armv7pmu;
2945
2946			/* Reset PMNC and read the nb of CNTx counters
2947			    supported */
2948			armv7pmu.num_events = armv7_reset_read_pmnc();
2949			perf_max_events = armv7pmu.num_events;
2950			break;
2951		case 0xC090:	/* Cortex-A9 */
2952			armv7pmu.id = ARM_PERF_PMU_ID_CA9;
2953			memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map,
2954				sizeof(armv7_a9_perf_cache_map));
2955			armv7pmu.event_map = armv7_a9_pmu_event_map;
2956			armpmu = &armv7pmu;
2957
2958			/* Reset PMNC and read the nb of CNTx counters
2959			    supported */
2960			armv7pmu.num_events = armv7_reset_read_pmnc();
2961			perf_max_events = armv7pmu.num_events;
2962			break;
2963		}
2964	/* Intel CPUs [xscale]. */
2965	} else if (0x69 == implementor) {
2966		part_number = (cpuid >> 13) & 0x7;
2967		switch (part_number) {
2968		case 1:
2969			armpmu = &xscale1pmu;
2970			memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2971					sizeof(xscale_perf_cache_map));
2972			perf_max_events	= xscale1pmu.num_events;
2973			break;
2974		case 2:
2975			armpmu = &xscale2pmu;
2976			memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2977					sizeof(xscale_perf_cache_map));
2978			perf_max_events	= xscale2pmu.num_events;
2979			break;
2980		}
2981	}
2982
2983	if (armpmu) {
2984		pr_info("enabled with %s PMU driver, %d counters available\n",
2985				arm_pmu_names[armpmu->id], armpmu->num_events);
2986	} else {
2987		pr_info("no hardware support available\n");
2988		perf_max_events = -1;
2989	}
2990
2991	return 0;
2992}
2993arch_initcall(init_hw_perf_events);
2994
2995/*
2996 * Callchain handling code.
2997 */
2998static inline void
2999callchain_store(struct perf_callchain_entry *entry,
3000		u64 ip)
3001{
3002	if (entry->nr < PERF_MAX_STACK_DEPTH)
3003		entry->ip[entry->nr++] = ip;
3004}
3005
3006struct frame_tail {
3007	struct frame_tail   *fp;
3008	unsigned long	    sp;
3009	unsigned long	    lr;
3010} __attribute__((packed));
3011
3012/*
3013 * Get the return address for a single stackframe and return a pointer to the
3014 * next frame tail.
3015 */
3016static struct frame_tail *
3017user_backtrace(struct frame_tail *tail,
3018	       struct perf_callchain_entry *entry)
3019{
3020	struct frame_tail buftail;
3021
3022	/* Also check accessibility of one struct frame_tail beyond */
3023	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
3024		return NULL;
3025	if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
3026		return NULL;
3027
3028	callchain_store(entry, buftail.lr);
3029
3030	/*
3031	 * Frame pointers should strictly progress back up the stack
3032	 * (towards higher addresses).
3033	 */
3034	if (tail >= buftail.fp)
3035		return NULL;
3036
3037	return buftail.fp - 1;
3038}
3039
3040static void
3041perf_callchain_user(struct pt_regs *regs,
3042		    struct perf_callchain_entry *entry)
3043{
3044	struct frame_tail *tail;
3045
3046	callchain_store(entry, PERF_CONTEXT_USER);
3047
3048	if (!user_mode(regs))
3049		regs = task_pt_regs(current);
3050
3051	tail = (struct frame_tail *)regs->ARM_fp - 1;
3052
3053	while (tail && !((unsigned long)tail & 0x3))
3054		tail = user_backtrace(tail, entry);
3055}
3056
3057/*
3058 * Gets called by walk_stackframe() for every stackframe. This will be called
3059 * whist unwinding the stackframe and is like a subroutine return so we use
3060 * the PC.
3061 */
3062static int
3063callchain_trace(struct stackframe *fr,
3064		void *data)
3065{
3066	struct perf_callchain_entry *entry = data;
3067	callchain_store(entry, fr->pc);
3068	return 0;
3069}
3070
3071static void
3072perf_callchain_kernel(struct pt_regs *regs,
3073		      struct perf_callchain_entry *entry)
3074{
3075	struct stackframe fr;
3076
3077	callchain_store(entry, PERF_CONTEXT_KERNEL);
3078	fr.fp = regs->ARM_fp;
3079	fr.sp = regs->ARM_sp;
3080	fr.lr = regs->ARM_lr;
3081	fr.pc = regs->ARM_pc;
3082	walk_stackframe(&fr, callchain_trace, entry);
3083}
3084
3085static void
3086perf_do_callchain(struct pt_regs *regs,
3087		  struct perf_callchain_entry *entry)
3088{
3089	int is_user;
3090
3091	if (!regs)
3092		return;
3093
3094	is_user = user_mode(regs);
3095
3096	if (!current || !current->pid)
3097		return;
3098
3099	if (is_user && current->state != TASK_RUNNING)
3100		return;
3101
3102	if (!is_user)
3103		perf_callchain_kernel(regs, entry);
3104
3105	if (current->mm)
3106		perf_callchain_user(regs, entry);
3107}
3108
3109static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
3110
3111struct perf_callchain_entry *
3112perf_callchain(struct pt_regs *regs)
3113{
3114	struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
3115
3116	entry->nr = 0;
3117	perf_do_callchain(regs, entry);
3118	return entry;
3119}
3120