• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/arch/x86/kernel/cpu/
1#ifdef CONFIG_CPU_SUP_AMD
2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4
5static __initconst const u64 amd_hw_cache_event_ids
6				[PERF_COUNT_HW_CACHE_MAX]
7				[PERF_COUNT_HW_CACHE_OP_MAX]
8				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
9{
10 [ C(L1D) ] = {
11	[ C(OP_READ) ] = {
12		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
13		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
14	},
15	[ C(OP_WRITE) ] = {
16		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
17		[ C(RESULT_MISS)   ] = 0,
18	},
19	[ C(OP_PREFETCH) ] = {
20		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
21		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
22	},
23 },
24 [ C(L1I ) ] = {
25	[ C(OP_READ) ] = {
26		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
27		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
28	},
29	[ C(OP_WRITE) ] = {
30		[ C(RESULT_ACCESS) ] = -1,
31		[ C(RESULT_MISS)   ] = -1,
32	},
33	[ C(OP_PREFETCH) ] = {
34		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
35		[ C(RESULT_MISS)   ] = 0,
36	},
37 },
38 [ C(LL  ) ] = {
39	[ C(OP_READ) ] = {
40		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
41		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
42	},
43	[ C(OP_WRITE) ] = {
44		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
45		[ C(RESULT_MISS)   ] = 0,
46	},
47	[ C(OP_PREFETCH) ] = {
48		[ C(RESULT_ACCESS) ] = 0,
49		[ C(RESULT_MISS)   ] = 0,
50	},
51 },
52 [ C(DTLB) ] = {
53	[ C(OP_READ) ] = {
54		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
55		[ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
56	},
57	[ C(OP_WRITE) ] = {
58		[ C(RESULT_ACCESS) ] = 0,
59		[ C(RESULT_MISS)   ] = 0,
60	},
61	[ C(OP_PREFETCH) ] = {
62		[ C(RESULT_ACCESS) ] = 0,
63		[ C(RESULT_MISS)   ] = 0,
64	},
65 },
66 [ C(ITLB) ] = {
67	[ C(OP_READ) ] = {
68		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
69		[ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
70	},
71	[ C(OP_WRITE) ] = {
72		[ C(RESULT_ACCESS) ] = -1,
73		[ C(RESULT_MISS)   ] = -1,
74	},
75	[ C(OP_PREFETCH) ] = {
76		[ C(RESULT_ACCESS) ] = -1,
77		[ C(RESULT_MISS)   ] = -1,
78	},
79 },
80 [ C(BPU ) ] = {
81	[ C(OP_READ) ] = {
82		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
83		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
84	},
85	[ C(OP_WRITE) ] = {
86		[ C(RESULT_ACCESS) ] = -1,
87		[ C(RESULT_MISS)   ] = -1,
88	},
89	[ C(OP_PREFETCH) ] = {
90		[ C(RESULT_ACCESS) ] = -1,
91		[ C(RESULT_MISS)   ] = -1,
92	},
93 },
94};
95
96/*
97 * AMD Performance Monitor K7 and later.
98 */
99static const u64 amd_perfmon_event_map[] =
100{
101  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
102  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
103  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
104  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
105  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
106  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
107};
108
109static u64 amd_pmu_event_map(int hw_event)
110{
111	return amd_perfmon_event_map[hw_event];
112}
113
114static int amd_pmu_hw_config(struct perf_event *event)
115{
116	int ret = x86_pmu_hw_config(event);
117
118	if (ret)
119		return ret;
120
121	if (event->attr.type != PERF_TYPE_RAW)
122		return 0;
123
124	event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
125
126	return 0;
127}
128
129/*
130 * AMD64 events are detected based on their event codes.
131 */
132static inline int amd_is_nb_event(struct hw_perf_event *hwc)
133{
134	return (hwc->config & 0xe0) == 0xe0;
135}
136
137static inline int amd_has_nb(struct cpu_hw_events *cpuc)
138{
139	struct amd_nb *nb = cpuc->amd_nb;
140
141	return nb && nb->nb_id != -1;
142}
143
144static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
145				      struct perf_event *event)
146{
147	struct hw_perf_event *hwc = &event->hw;
148	struct amd_nb *nb = cpuc->amd_nb;
149	int i;
150
151	/*
152	 * only care about NB events
153	 */
154	if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
155		return;
156
157	/*
158	 * need to scan whole list because event may not have
159	 * been assigned during scheduling
160	 *
161	 * no race condition possible because event can only
162	 * be removed on one CPU at a time AND PMU is disabled
163	 * when we come here
164	 */
165	for (i = 0; i < x86_pmu.num_counters; i++) {
166		if (nb->owners[i] == event) {
167			cmpxchg(nb->owners+i, event, NULL);
168			break;
169		}
170	}
171}
172
173 /*
174  * AMD64 NorthBridge events need special treatment because
175  * counter access needs to be synchronized across all cores
176  * of a package. Refer to BKDG section 3.12
177  *
178  * NB events are events measuring L3 cache, Hypertransport
179  * traffic. They are identified by an event code >= 0xe00.
180  * They measure events on the NorthBride which is shared
181  * by all cores on a package. NB events are counted on a
182  * shared set of counters. When a NB event is programmed
183  * in a counter, the data actually comes from a shared
184  * counter. Thus, access to those counters needs to be
185  * synchronized.
186  *
187  * We implement the synchronization such that no two cores
188  * can be measuring NB events using the same counters. Thus,
189  * we maintain a per-NB allocation table. The available slot
190  * is propagated using the event_constraint structure.
191  *
192  * We provide only one choice for each NB event based on
193  * the fact that only NB events have restrictions. Consequently,
194  * if a counter is available, there is a guarantee the NB event
195  * will be assigned to it. If no slot is available, an empty
196  * constraint is returned and scheduling will eventually fail
197  * for this event.
198  *
199  * Note that all cores attached the same NB compete for the same
200  * counters to host NB events, this is why we use atomic ops. Some
201  * multi-chip CPUs may have more than one NB.
202  *
203  * Given that resources are allocated (cmpxchg), they must be
204  * eventually freed for others to use. This is accomplished by
205  * calling amd_put_event_constraints().
206  *
207  * Non NB events are not impacted by this restriction.
208  */
209static struct event_constraint *
210amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
211{
212	struct hw_perf_event *hwc = &event->hw;
213	struct amd_nb *nb = cpuc->amd_nb;
214	struct perf_event *old = NULL;
215	int max = x86_pmu.num_counters;
216	int i, j, k = -1;
217
218	/*
219	 * if not NB event or no NB, then no constraints
220	 */
221	if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
222		return &unconstrained;
223
224	/*
225	 * detect if already present, if so reuse
226	 *
227	 * cannot merge with actual allocation
228	 * because of possible holes
229	 *
230	 * event can already be present yet not assigned (in hwc->idx)
231	 * because of successive calls to x86_schedule_events() from
232	 * hw_perf_group_sched_in() without hw_perf_enable()
233	 */
234	for (i = 0; i < max; i++) {
235		/*
236		 * keep track of first free slot
237		 */
238		if (k == -1 && !nb->owners[i])
239			k = i;
240
241		/* already present, reuse */
242		if (nb->owners[i] == event)
243			goto done;
244	}
245	/*
246	 * not present, so grab a new slot
247	 * starting either at:
248	 */
249	if (hwc->idx != -1) {
250		/* previous assignment */
251		i = hwc->idx;
252	} else if (k != -1) {
253		/* start from free slot found */
254		i = k;
255	} else {
256		/*
257		 * event not found, no slot found in
258		 * first pass, try again from the
259		 * beginning
260		 */
261		i = 0;
262	}
263	j = i;
264	do {
265		old = cmpxchg(nb->owners+i, NULL, event);
266		if (!old)
267			break;
268		if (++i == max)
269			i = 0;
270	} while (i != j);
271done:
272	if (!old)
273		return &nb->event_constraints[i];
274
275	return &emptyconstraint;
276}
277
278static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
279{
280	struct amd_nb *nb;
281	int i;
282
283	nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
284	if (!nb)
285		return NULL;
286
287	memset(nb, 0, sizeof(*nb));
288	nb->nb_id = nb_id;
289
290	/*
291	 * initialize all possible NB constraints
292	 */
293	for (i = 0; i < x86_pmu.num_counters; i++) {
294		__set_bit(i, nb->event_constraints[i].idxmsk);
295		nb->event_constraints[i].weight = 1;
296	}
297	return nb;
298}
299
300static int amd_pmu_cpu_prepare(int cpu)
301{
302	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
303
304	WARN_ON_ONCE(cpuc->amd_nb);
305
306	if (boot_cpu_data.x86_max_cores < 2)
307		return NOTIFY_OK;
308
309	cpuc->amd_nb = amd_alloc_nb(cpu, -1);
310	if (!cpuc->amd_nb)
311		return NOTIFY_BAD;
312
313	return NOTIFY_OK;
314}
315
316static void amd_pmu_cpu_starting(int cpu)
317{
318	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
319	struct amd_nb *nb;
320	int i, nb_id;
321
322	if (boot_cpu_data.x86_max_cores < 2)
323		return;
324
325	nb_id = amd_get_nb_id(cpu);
326	WARN_ON_ONCE(nb_id == BAD_APICID);
327
328	raw_spin_lock(&amd_nb_lock);
329
330	for_each_online_cpu(i) {
331		nb = per_cpu(cpu_hw_events, i).amd_nb;
332		if (WARN_ON_ONCE(!nb))
333			continue;
334
335		if (nb->nb_id == nb_id) {
336			kfree(cpuc->amd_nb);
337			cpuc->amd_nb = nb;
338			break;
339		}
340	}
341
342	cpuc->amd_nb->nb_id = nb_id;
343	cpuc->amd_nb->refcnt++;
344
345	raw_spin_unlock(&amd_nb_lock);
346}
347
348static void amd_pmu_cpu_dead(int cpu)
349{
350	struct cpu_hw_events *cpuhw;
351
352	if (boot_cpu_data.x86_max_cores < 2)
353		return;
354
355	cpuhw = &per_cpu(cpu_hw_events, cpu);
356
357	raw_spin_lock(&amd_nb_lock);
358
359	if (cpuhw->amd_nb) {
360		struct amd_nb *nb = cpuhw->amd_nb;
361
362		if (nb->nb_id == -1 || --nb->refcnt == 0)
363			kfree(nb);
364
365		cpuhw->amd_nb = NULL;
366	}
367
368	raw_spin_unlock(&amd_nb_lock);
369}
370
371static __initconst const struct x86_pmu amd_pmu = {
372	.name			= "AMD",
373	.handle_irq		= x86_pmu_handle_irq,
374	.disable_all		= x86_pmu_disable_all,
375	.enable_all		= x86_pmu_enable_all,
376	.enable			= x86_pmu_enable_event,
377	.disable		= x86_pmu_disable_event,
378	.hw_config		= amd_pmu_hw_config,
379	.schedule_events	= x86_schedule_events,
380	.eventsel		= MSR_K7_EVNTSEL0,
381	.perfctr		= MSR_K7_PERFCTR0,
382	.event_map		= amd_pmu_event_map,
383	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
384	.num_counters		= 4,
385	.cntval_bits		= 48,
386	.cntval_mask		= (1ULL << 48) - 1,
387	.apic			= 1,
388	/* use highest bit to detect overflow */
389	.max_period		= (1ULL << 47) - 1,
390	.get_event_constraints	= amd_get_event_constraints,
391	.put_event_constraints	= amd_put_event_constraints,
392
393	.cpu_prepare		= amd_pmu_cpu_prepare,
394	.cpu_starting		= amd_pmu_cpu_starting,
395	.cpu_dead		= amd_pmu_cpu_dead,
396};
397
398static __init int amd_pmu_init(void)
399{
400	/* Performance-monitoring supported from K7 and later: */
401	if (boot_cpu_data.x86 < 6)
402		return -ENODEV;
403
404	x86_pmu = amd_pmu;
405
406	/* Events are common for all AMDs */
407	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
408	       sizeof(hw_cache_event_ids));
409
410	return 0;
411}
412
413#else /* CONFIG_CPU_SUP_AMD */
414
415static int amd_pmu_init(void)
416{
417	return 0;
418}
419
420#endif
421