• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/arch/x86/kernel/cpu/
1/*
2 * Netburst Perfomance Events (P4, old Xeon)
3 *
4 *  Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
5 *  Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
6 *
7 *  For licencing details see kernel-base/COPYING
8 */
9
10#ifdef CONFIG_CPU_SUP_INTEL
11
12#include <asm/perf_event_p4.h>
13
14#define P4_CNTR_LIMIT 3
15/*
16 * array indices: 0,1 - HT threads, used with HT enabled cpu
17 */
18struct p4_event_bind {
19	unsigned int opcode;			/* Event code and ESCR selector */
20	unsigned int escr_msr[2];		/* ESCR MSR for this event */
21	char cntr[2][P4_CNTR_LIMIT];		/* counter index (offset), -1 on abscence */
22};
23
24struct p4_pebs_bind {
25	unsigned int metric_pebs;
26	unsigned int metric_vert;
27};
28
29/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
30#define P4_GEN_PEBS_BIND(name, pebs, vert)			\
31	[P4_PEBS_METRIC__##name] = {				\
32		.metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG,	\
33		.metric_vert = vert,				\
34	}
35
36/*
37 * note we have P4_PEBS_ENABLE_UOP_TAG always set here
38 *
39 * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
40 * event configuration to find out which values are to be
41 * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
42 * resgisters
43 */
44static struct p4_pebs_bind p4_pebs_bind_map[] = {
45	P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired,	0x0000001, 0x0000001),
46	P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired,	0x0000002, 0x0000001),
47	P4_GEN_PEBS_BIND(dtlb_load_miss_retired,	0x0000004, 0x0000001),
48	P4_GEN_PEBS_BIND(dtlb_store_miss_retired,	0x0000004, 0x0000002),
49	P4_GEN_PEBS_BIND(dtlb_all_miss_retired,		0x0000004, 0x0000003),
50	P4_GEN_PEBS_BIND(tagged_mispred_branch,		0x0018000, 0x0000010),
51	P4_GEN_PEBS_BIND(mob_load_replay_retired,	0x0000200, 0x0000001),
52	P4_GEN_PEBS_BIND(split_load_retired,		0x0000400, 0x0000001),
53	P4_GEN_PEBS_BIND(split_store_retired,		0x0000400, 0x0000002),
54};
55
56static struct p4_event_bind p4_event_bind_map[] = {
57	[P4_EVENT_TC_DELIVER_MODE] = {
58		.opcode		= P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
59		.escr_msr	= { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
60		.cntr		= { {4, 5, -1}, {6, 7, -1} },
61	},
62	[P4_EVENT_BPU_FETCH_REQUEST] = {
63		.opcode		= P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
64		.escr_msr	= { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
65		.cntr		= { {0, -1, -1}, {2, -1, -1} },
66	},
67	[P4_EVENT_ITLB_REFERENCE] = {
68		.opcode		= P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
69		.escr_msr	= { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
70		.cntr		= { {0, -1, -1}, {2, -1, -1} },
71	},
72	[P4_EVENT_MEMORY_CANCEL] = {
73		.opcode		= P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
74		.escr_msr	= { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
75		.cntr		= { {8, 9, -1}, {10, 11, -1} },
76	},
77	[P4_EVENT_MEMORY_COMPLETE] = {
78		.opcode		= P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
79		.escr_msr	= { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
80		.cntr		= { {8, 9, -1}, {10, 11, -1} },
81	},
82	[P4_EVENT_LOAD_PORT_REPLAY] = {
83		.opcode		= P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
84		.escr_msr	= { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
85		.cntr		= { {8, 9, -1}, {10, 11, -1} },
86	},
87	[P4_EVENT_STORE_PORT_REPLAY] = {
88		.opcode		= P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
89		.escr_msr	= { MSR_P4_SAAT_ESCR0 ,  MSR_P4_SAAT_ESCR1 },
90		.cntr		= { {8, 9, -1}, {10, 11, -1} },
91	},
92	[P4_EVENT_MOB_LOAD_REPLAY] = {
93		.opcode		= P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
94		.escr_msr	= { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
95		.cntr		= { {0, -1, -1}, {2, -1, -1} },
96	},
97	[P4_EVENT_PAGE_WALK_TYPE] = {
98		.opcode		= P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
99		.escr_msr	= { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
100		.cntr		= { {0, -1, -1}, {2, -1, -1} },
101	},
102	[P4_EVENT_BSQ_CACHE_REFERENCE] = {
103		.opcode		= P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
104		.escr_msr	= { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
105		.cntr		= { {0, -1, -1}, {2, -1, -1} },
106	},
107	[P4_EVENT_IOQ_ALLOCATION] = {
108		.opcode		= P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
109		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
110		.cntr		= { {0, -1, -1}, {2, -1, -1} },
111	},
112	[P4_EVENT_IOQ_ACTIVE_ENTRIES] = {	/* shared ESCR */
113		.opcode		= P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
114		.escr_msr	= { MSR_P4_FSB_ESCR1,  MSR_P4_FSB_ESCR1 },
115		.cntr		= { {2, -1, -1}, {3, -1, -1} },
116	},
117	[P4_EVENT_FSB_DATA_ACTIVITY] = {
118		.opcode		= P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
119		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
120		.cntr		= { {0, -1, -1}, {2, -1, -1} },
121	},
122	[P4_EVENT_BSQ_ALLOCATION] = {		/* shared ESCR, broken CCCR1 */
123		.opcode		= P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
124		.escr_msr	= { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
125		.cntr		= { {0, -1, -1}, {1, -1, -1} },
126	},
127	[P4_EVENT_BSQ_ACTIVE_ENTRIES] = {	/* shared ESCR */
128		.opcode		= P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
129		.escr_msr	= { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
130		.cntr		= { {2, -1, -1}, {3, -1, -1} },
131	},
132	[P4_EVENT_SSE_INPUT_ASSIST] = {
133		.opcode		= P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
134		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
135		.cntr		= { {8, 9, -1}, {10, 11, -1} },
136	},
137	[P4_EVENT_PACKED_SP_UOP] = {
138		.opcode		= P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
139		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
140		.cntr		= { {8, 9, -1}, {10, 11, -1} },
141	},
142	[P4_EVENT_PACKED_DP_UOP] = {
143		.opcode		= P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
144		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
145		.cntr		= { {8, 9, -1}, {10, 11, -1} },
146	},
147	[P4_EVENT_SCALAR_SP_UOP] = {
148		.opcode		= P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
149		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
150		.cntr		= { {8, 9, -1}, {10, 11, -1} },
151	},
152	[P4_EVENT_SCALAR_DP_UOP] = {
153		.opcode		= P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
154		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
155		.cntr		= { {8, 9, -1}, {10, 11, -1} },
156	},
157	[P4_EVENT_64BIT_MMX_UOP] = {
158		.opcode		= P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
159		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
160		.cntr		= { {8, 9, -1}, {10, 11, -1} },
161	},
162	[P4_EVENT_128BIT_MMX_UOP] = {
163		.opcode		= P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
164		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
165		.cntr		= { {8, 9, -1}, {10, 11, -1} },
166	},
167	[P4_EVENT_X87_FP_UOP] = {
168		.opcode		= P4_OPCODE(P4_EVENT_X87_FP_UOP),
169		.escr_msr	= { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
170		.cntr		= { {8, 9, -1}, {10, 11, -1} },
171	},
172	[P4_EVENT_TC_MISC] = {
173		.opcode		= P4_OPCODE(P4_EVENT_TC_MISC),
174		.escr_msr	= { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
175		.cntr		= { {4, 5, -1}, {6, 7, -1} },
176	},
177	[P4_EVENT_GLOBAL_POWER_EVENTS] = {
178		.opcode		= P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
179		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
180		.cntr		= { {0, -1, -1}, {2, -1, -1} },
181	},
182	[P4_EVENT_TC_MS_XFER] = {
183		.opcode		= P4_OPCODE(P4_EVENT_TC_MS_XFER),
184		.escr_msr	= { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
185		.cntr		= { {4, 5, -1}, {6, 7, -1} },
186	},
187	[P4_EVENT_UOP_QUEUE_WRITES] = {
188		.opcode		= P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
189		.escr_msr	= { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
190		.cntr		= { {4, 5, -1}, {6, 7, -1} },
191	},
192	[P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
193		.opcode		= P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
194		.escr_msr	= { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
195		.cntr		= { {4, 5, -1}, {6, 7, -1} },
196	},
197	[P4_EVENT_RETIRED_BRANCH_TYPE] = {
198		.opcode		= P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
199		.escr_msr	= { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
200		.cntr		= { {4, 5, -1}, {6, 7, -1} },
201	},
202	[P4_EVENT_RESOURCE_STALL] = {
203		.opcode		= P4_OPCODE(P4_EVENT_RESOURCE_STALL),
204		.escr_msr	= { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
205		.cntr		= { {12, 13, 16}, {14, 15, 17} },
206	},
207	[P4_EVENT_WC_BUFFER] = {
208		.opcode		= P4_OPCODE(P4_EVENT_WC_BUFFER),
209		.escr_msr	= { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
210		.cntr		= { {8, 9, -1}, {10, 11, -1} },
211	},
212	[P4_EVENT_B2B_CYCLES] = {
213		.opcode		= P4_OPCODE(P4_EVENT_B2B_CYCLES),
214		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
215		.cntr		= { {0, -1, -1}, {2, -1, -1} },
216	},
217	[P4_EVENT_BNR] = {
218		.opcode		= P4_OPCODE(P4_EVENT_BNR),
219		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
220		.cntr		= { {0, -1, -1}, {2, -1, -1} },
221	},
222	[P4_EVENT_SNOOP] = {
223		.opcode		= P4_OPCODE(P4_EVENT_SNOOP),
224		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
225		.cntr		= { {0, -1, -1}, {2, -1, -1} },
226	},
227	[P4_EVENT_RESPONSE] = {
228		.opcode		= P4_OPCODE(P4_EVENT_RESPONSE),
229		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
230		.cntr		= { {0, -1, -1}, {2, -1, -1} },
231	},
232	[P4_EVENT_FRONT_END_EVENT] = {
233		.opcode		= P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
234		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
235		.cntr		= { {12, 13, 16}, {14, 15, 17} },
236	},
237	[P4_EVENT_EXECUTION_EVENT] = {
238		.opcode		= P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
239		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
240		.cntr		= { {12, 13, 16}, {14, 15, 17} },
241	},
242	[P4_EVENT_REPLAY_EVENT] = {
243		.opcode		= P4_OPCODE(P4_EVENT_REPLAY_EVENT),
244		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
245		.cntr		= { {12, 13, 16}, {14, 15, 17} },
246	},
247	[P4_EVENT_INSTR_RETIRED] = {
248		.opcode		= P4_OPCODE(P4_EVENT_INSTR_RETIRED),
249		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
250		.cntr		= { {12, 13, 16}, {14, 15, 17} },
251	},
252	[P4_EVENT_UOPS_RETIRED] = {
253		.opcode		= P4_OPCODE(P4_EVENT_UOPS_RETIRED),
254		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
255		.cntr		= { {12, 13, 16}, {14, 15, 17} },
256	},
257	[P4_EVENT_UOP_TYPE] = {
258		.opcode		= P4_OPCODE(P4_EVENT_UOP_TYPE),
259		.escr_msr	= { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
260		.cntr		= { {12, 13, 16}, {14, 15, 17} },
261	},
262	[P4_EVENT_BRANCH_RETIRED] = {
263		.opcode		= P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
264		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
265		.cntr		= { {12, 13, 16}, {14, 15, 17} },
266	},
267	[P4_EVENT_MISPRED_BRANCH_RETIRED] = {
268		.opcode		= P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
269		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
270		.cntr		= { {12, 13, 16}, {14, 15, 17} },
271	},
272	[P4_EVENT_X87_ASSIST] = {
273		.opcode		= P4_OPCODE(P4_EVENT_X87_ASSIST),
274		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
275		.cntr		= { {12, 13, 16}, {14, 15, 17} },
276	},
277	[P4_EVENT_MACHINE_CLEAR] = {
278		.opcode		= P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
279		.escr_msr	= { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
280		.cntr		= { {12, 13, 16}, {14, 15, 17} },
281	},
282	[P4_EVENT_INSTR_COMPLETED] = {
283		.opcode		= P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
284		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
285		.cntr		= { {12, 13, 16}, {14, 15, 17} },
286	},
287};
288
289#define P4_GEN_CACHE_EVENT(event, bit, metric)				  \
290	p4_config_pack_escr(P4_ESCR_EVENT(event)			| \
291			    P4_ESCR_EMASK_BIT(event, bit))		| \
292	p4_config_pack_cccr(metric					| \
293			    P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
294
295static __initconst const u64 p4_hw_cache_event_ids
296				[PERF_COUNT_HW_CACHE_MAX]
297				[PERF_COUNT_HW_CACHE_OP_MAX]
298				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
299{
300 [ C(L1D ) ] = {
301	[ C(OP_READ) ] = {
302		[ C(RESULT_ACCESS) ] = 0x0,
303		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
304						P4_PEBS_METRIC__1stl_cache_load_miss_retired),
305	},
306 },
307 [ C(LL  ) ] = {
308	[ C(OP_READ) ] = {
309		[ C(RESULT_ACCESS) ] = 0x0,
310		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
311						P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
312	},
313},
314 [ C(DTLB) ] = {
315	[ C(OP_READ) ] = {
316		[ C(RESULT_ACCESS) ] = 0x0,
317		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
318						P4_PEBS_METRIC__dtlb_load_miss_retired),
319	},
320	[ C(OP_WRITE) ] = {
321		[ C(RESULT_ACCESS) ] = 0x0,
322		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
323						P4_PEBS_METRIC__dtlb_store_miss_retired),
324	},
325 },
326 [ C(ITLB) ] = {
327	[ C(OP_READ) ] = {
328		[ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
329						P4_PEBS_METRIC__none),
330		[ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
331						P4_PEBS_METRIC__none),
332	},
333	[ C(OP_WRITE) ] = {
334		[ C(RESULT_ACCESS) ] = -1,
335		[ C(RESULT_MISS)   ] = -1,
336	},
337	[ C(OP_PREFETCH) ] = {
338		[ C(RESULT_ACCESS) ] = -1,
339		[ C(RESULT_MISS)   ] = -1,
340	},
341 },
342};
343
344static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
345  /* non-halted CPU clocks */
346  [PERF_COUNT_HW_CPU_CYCLES] =
347	p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)		|
348		P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
349
350  /*
351   * retired instructions
352   * in a sake of simplicity we don't use the FSB tagging
353   */
354  [PERF_COUNT_HW_INSTRUCTIONS] =
355	p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED)		|
356		P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG)		|
357		P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
358
359  /* cache hits */
360  [PERF_COUNT_HW_CACHE_REFERENCES] =
361	p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE)		|
362		P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)	|
363		P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)	|
364		P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM)	|
365		P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS)	|
366		P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE)	|
367		P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
368
369  /* cache misses */
370  [PERF_COUNT_HW_CACHE_MISSES] =
371	p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE)		|
372		P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)	|
373		P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)	|
374		P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
375
376  /* branch instructions retired */
377  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
378	p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE)		|
379		P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL)	|
380		P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL)		|
381		P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN)		|
382		P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
383
384  /* mispredicted branches retired */
385  [PERF_COUNT_HW_BRANCH_MISSES]	=
386	p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED)	|
387		P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
388
389  /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN):  */
390  [PERF_COUNT_HW_BUS_CYCLES] =
391	p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY)		|
392		P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV)		|
393		P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN))	|
394	p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
395};
396
397static struct p4_event_bind *p4_config_get_bind(u64 config)
398{
399	unsigned int evnt = p4_config_unpack_event(config);
400	struct p4_event_bind *bind = NULL;
401
402	if (evnt < ARRAY_SIZE(p4_event_bind_map))
403		bind = &p4_event_bind_map[evnt];
404
405	return bind;
406}
407
408static u64 p4_pmu_event_map(int hw_event)
409{
410	struct p4_event_bind *bind;
411	unsigned int esel;
412	u64 config;
413
414	config = p4_general_events[hw_event];
415	bind = p4_config_get_bind(config);
416	esel = P4_OPCODE_ESEL(bind->opcode);
417	config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
418
419	return config;
420}
421
422static int p4_validate_raw_event(struct perf_event *event)
423{
424	unsigned int v;
425
426	/* user data may have out-of-bound event index */
427	v = p4_config_unpack_event(event->attr.config);
428	if (v >= ARRAY_SIZE(p4_event_bind_map)) {
429		pr_warning("P4 PMU: Unknown event code: %d\n", v);
430		return -EINVAL;
431	}
432
433	/*
434	 * it may have some screwed PEBS bits
435	 */
436	if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
437		pr_warning("P4 PMU: PEBS are not supported yet\n");
438		return -EINVAL;
439	}
440	v = p4_config_unpack_metric(event->attr.config);
441	if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
442		pr_warning("P4 PMU: Unknown metric code: %d\n", v);
443		return -EINVAL;
444	}
445
446	return 0;
447}
448
449static int p4_hw_config(struct perf_event *event)
450{
451	int cpu = get_cpu();
452	int rc = 0;
453	u32 escr, cccr;
454
455	/*
456	 * the reason we use cpu that early is that: if we get scheduled
457	 * first time on the same cpu -- we will not need swap thread
458	 * specific flags in config (and will save some cpu cycles)
459	 */
460
461	cccr = p4_default_cccr_conf(cpu);
462	escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
463					 event->attr.exclude_user);
464	event->hw.config = p4_config_pack_escr(escr) |
465			   p4_config_pack_cccr(cccr);
466
467	if (p4_ht_active() && p4_ht_thread(cpu))
468		event->hw.config = p4_set_ht_bit(event->hw.config);
469
470	if (event->attr.type == PERF_TYPE_RAW) {
471
472		rc = p4_validate_raw_event(event);
473		if (rc)
474			goto out;
475
476		event->hw.config |= event->attr.config &
477			(p4_config_pack_escr(P4_ESCR_MASK_HT) |
478			 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
479
480		event->hw.config &= ~P4_CCCR_FORCE_OVF;
481	}
482
483	rc = x86_setup_perfctr(event);
484out:
485	put_cpu();
486	return rc;
487}
488
489static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
490{
491	int overflow = 0;
492	u32 low, high;
493
494	rdmsr(hwc->config_base + hwc->idx, low, high);
495
496	/* we need to check high bit for unflagged overflows */
497	if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) {
498		overflow = 1;
499		(void)checking_wrmsrl(hwc->config_base + hwc->idx,
500			((u64)low) & ~P4_CCCR_OVF);
501	}
502
503	return overflow;
504}
505
506static void p4_pmu_disable_pebs(void)
507{
508}
509
510static inline void p4_pmu_disable_event(struct perf_event *event)
511{
512	struct hw_perf_event *hwc = &event->hw;
513
514	/*
515	 * If event gets disabled while counter is in overflowed
516	 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
517	 * asserted again and again
518	 */
519	(void)checking_wrmsrl(hwc->config_base + hwc->idx,
520		(u64)(p4_config_unpack_cccr(hwc->config)) &
521			~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
522}
523
524static void p4_pmu_disable_all(void)
525{
526	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
527	int idx;
528
529	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
530		struct perf_event *event = cpuc->events[idx];
531		if (!test_bit(idx, cpuc->active_mask))
532			continue;
533		p4_pmu_disable_event(event);
534	}
535
536	p4_pmu_disable_pebs();
537}
538
539/* configuration must be valid */
540static void p4_pmu_enable_pebs(u64 config)
541{
542	struct p4_pebs_bind *bind;
543	unsigned int idx;
544
545	BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
546
547	idx = p4_config_unpack_metric(config);
548	if (idx == P4_PEBS_METRIC__none)
549		return;
550
551	bind = &p4_pebs_bind_map[idx];
552
553	(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE,	(u64)bind->metric_pebs);
554	(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT,	(u64)bind->metric_vert);
555}
556
557static void p4_pmu_enable_event(struct perf_event *event)
558{
559	struct hw_perf_event *hwc = &event->hw;
560	int thread = p4_ht_config_thread(hwc->config);
561	u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
562	unsigned int idx = p4_config_unpack_event(hwc->config);
563	struct p4_event_bind *bind;
564	u64 escr_addr, cccr;
565
566	bind = &p4_event_bind_map[idx];
567	escr_addr = (u64)bind->escr_msr[thread];
568
569	/*
570	 * - we dont support cascaded counters yet
571	 * - and counter 1 is broken (erratum)
572	 */
573	WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
574	WARN_ON_ONCE(hwc->idx == 1);
575
576	/* we need a real Event value */
577	escr_conf &= ~P4_ESCR_EVENT_MASK;
578	escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
579
580	cccr = p4_config_unpack_cccr(hwc->config);
581
582	/*
583	 * it could be Cache event so we need to write metrics
584	 * into additional MSRs
585	 */
586	p4_pmu_enable_pebs(hwc->config);
587
588	(void)checking_wrmsrl(escr_addr, escr_conf);
589	(void)checking_wrmsrl(hwc->config_base + hwc->idx,
590				(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
591}
592
593static void p4_pmu_enable_all(int added)
594{
595	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
596	int idx;
597
598	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
599		struct perf_event *event = cpuc->events[idx];
600		if (!test_bit(idx, cpuc->active_mask))
601			continue;
602		p4_pmu_enable_event(event);
603	}
604}
605
606static int p4_pmu_handle_irq(struct pt_regs *regs)
607{
608	struct perf_sample_data data;
609	struct cpu_hw_events *cpuc;
610	struct perf_event *event;
611	struct hw_perf_event *hwc;
612	int idx, handled = 0;
613	u64 val;
614
615	data.addr = 0;
616	data.raw = NULL;
617
618	cpuc = &__get_cpu_var(cpu_hw_events);
619
620	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
621		int overflow;
622
623		if (!test_bit(idx, cpuc->active_mask)) {
624			/* catch in-flight IRQs */
625			if (__test_and_clear_bit(idx, cpuc->running))
626				handled++;
627			continue;
628		}
629
630		event = cpuc->events[idx];
631		hwc = &event->hw;
632
633		WARN_ON_ONCE(hwc->idx != idx);
634
635		/* it might be unflagged overflow */
636		overflow = p4_pmu_clear_cccr_ovf(hwc);
637
638		val = x86_perf_event_update(event);
639		if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
640			continue;
641
642		handled += overflow;
643
644		/* event overflow for sure */
645		data.period = event->hw.last_period;
646
647		if (!x86_perf_event_set_period(event))
648			continue;
649		if (perf_event_overflow(event, 1, &data, regs))
650			p4_pmu_disable_event(event);
651	}
652
653	if (handled) {
654		/* p4 quirk: unmask it again */
655		apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
656		inc_irq_stat(apic_perf_irqs);
657	}
658
659	return handled;
660}
661
662/*
663 * swap thread specific fields according to a thread
664 * we are going to run on
665 */
666static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
667{
668	u32 escr, cccr;
669
670	/*
671	 * we either lucky and continue on same cpu or no HT support
672	 */
673	if (!p4_should_swap_ts(hwc->config, cpu))
674		return;
675
676	/*
677	 * the event is migrated from an another logical
678	 * cpu, so we need to swap thread specific flags
679	 */
680
681	escr = p4_config_unpack_escr(hwc->config);
682	cccr = p4_config_unpack_cccr(hwc->config);
683
684	if (p4_ht_thread(cpu)) {
685		cccr &= ~P4_CCCR_OVF_PMI_T0;
686		cccr |= P4_CCCR_OVF_PMI_T1;
687		if (escr & P4_ESCR_T0_OS) {
688			escr &= ~P4_ESCR_T0_OS;
689			escr |= P4_ESCR_T1_OS;
690		}
691		if (escr & P4_ESCR_T0_USR) {
692			escr &= ~P4_ESCR_T0_USR;
693			escr |= P4_ESCR_T1_USR;
694		}
695		hwc->config  = p4_config_pack_escr(escr);
696		hwc->config |= p4_config_pack_cccr(cccr);
697		hwc->config |= P4_CONFIG_HT;
698	} else {
699		cccr &= ~P4_CCCR_OVF_PMI_T1;
700		cccr |= P4_CCCR_OVF_PMI_T0;
701		if (escr & P4_ESCR_T1_OS) {
702			escr &= ~P4_ESCR_T1_OS;
703			escr |= P4_ESCR_T0_OS;
704		}
705		if (escr & P4_ESCR_T1_USR) {
706			escr &= ~P4_ESCR_T1_USR;
707			escr |= P4_ESCR_T0_USR;
708		}
709		hwc->config  = p4_config_pack_escr(escr);
710		hwc->config |= p4_config_pack_cccr(cccr);
711		hwc->config &= ~P4_CONFIG_HT;
712	}
713}
714
715/*
716 * ESCR address hashing is tricky, ESCRs are not sequential
717 * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and
718 * the metric between any ESCRs is laid in range [0xa0,0xe1]
719 *
720 * so we make ~70% filled hashtable
721 */
722
723#define P4_ESCR_MSR_BASE		0x000003a0
724#define P4_ESCR_MSR_MAX			0x000003e1
725#define P4_ESCR_MSR_TABLE_SIZE		(P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)
726#define P4_ESCR_MSR_IDX(msr)		(msr - P4_ESCR_MSR_BASE)
727#define P4_ESCR_MSR_TABLE_ENTRY(msr)	[P4_ESCR_MSR_IDX(msr)] = msr
728
729static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = {
730	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0),
731	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1),
732	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0),
733	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1),
734	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0),
735	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1),
736	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0),
737	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1),
738	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2),
739	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3),
740	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4),
741	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5),
742	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0),
743	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1),
744	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0),
745	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1),
746	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0),
747	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1),
748	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0),
749	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1),
750	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0),
751	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1),
752	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0),
753	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1),
754	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0),
755	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1),
756	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0),
757	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1),
758	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0),
759	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1),
760	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0),
761	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1),
762	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0),
763	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1),
764	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0),
765	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1),
766	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0),
767	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1),
768	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0),
769	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1),
770	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0),
771	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1),
772	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0),
773	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1),
774	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0),
775	P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1),
776};
777
778static int p4_get_escr_idx(unsigned int addr)
779{
780	unsigned int idx = P4_ESCR_MSR_IDX(addr);
781
782	if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE	||
783			!p4_escr_table[idx]		||
784			p4_escr_table[idx] != addr)) {
785		WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);
786		return -1;
787	}
788
789	return idx;
790}
791
792static int p4_next_cntr(int thread, unsigned long *used_mask,
793			struct p4_event_bind *bind)
794{
795	int i, j;
796
797	for (i = 0; i < P4_CNTR_LIMIT; i++) {
798		j = bind->cntr[thread][i];
799		if (j != -1 && !test_bit(j, used_mask))
800			return j;
801	}
802
803	return -1;
804}
805
806static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
807{
808	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
809	unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];
810	int cpu = smp_processor_id();
811	struct hw_perf_event *hwc;
812	struct p4_event_bind *bind;
813	unsigned int i, thread, num;
814	int cntr_idx, escr_idx;
815
816	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
817	bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
818
819	for (i = 0, num = n; i < n; i++, num--) {
820
821		hwc = &cpuc->event_list[i]->hw;
822		thread = p4_ht_thread(cpu);
823		bind = p4_config_get_bind(hwc->config);
824		escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
825		if (unlikely(escr_idx == -1))
826			goto done;
827
828		if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
829			cntr_idx = hwc->idx;
830			if (assign)
831				assign[i] = hwc->idx;
832			goto reserve;
833		}
834
835		cntr_idx = p4_next_cntr(thread, used_mask, bind);
836		if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
837			goto done;
838
839		p4_pmu_swap_config_ts(hwc, cpu);
840		if (assign)
841			assign[i] = cntr_idx;
842reserve:
843		set_bit(cntr_idx, used_mask);
844		set_bit(escr_idx, escr_mask);
845	}
846
847done:
848	return num ? -ENOSPC : 0;
849}
850
851static __initconst const struct x86_pmu p4_pmu = {
852	.name			= "Netburst P4/Xeon",
853	.handle_irq		= p4_pmu_handle_irq,
854	.disable_all		= p4_pmu_disable_all,
855	.enable_all		= p4_pmu_enable_all,
856	.enable			= p4_pmu_enable_event,
857	.disable		= p4_pmu_disable_event,
858	.eventsel		= MSR_P4_BPU_CCCR0,
859	.perfctr		= MSR_P4_BPU_PERFCTR0,
860	.event_map		= p4_pmu_event_map,
861	.max_events		= ARRAY_SIZE(p4_general_events),
862	.get_event_constraints	= x86_get_event_constraints,
863	/*
864	 * IF HT disabled we may need to use all
865	 * ARCH_P4_MAX_CCCR counters simulaneously
866	 * though leave it restricted at moment assuming
867	 * HT is on
868	 */
869	.num_counters		= ARCH_P4_MAX_CCCR,
870	.apic			= 1,
871	.cntval_bits		= 40,
872	.cntval_mask		= (1ULL << 40) - 1,
873	.max_period		= (1ULL << 39) - 1,
874	.hw_config		= p4_hw_config,
875	.schedule_events	= p4_pmu_schedule_events,
876	.perfctr_second_write	= 1,
877};
878
879static __init int p4_pmu_init(void)
880{
881	unsigned int low, high;
882
883	/* If we get stripped -- indexig fails */
884	BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
885
886	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
887	if (!(low & (1 << 7))) {
888		pr_cont("unsupported Netburst CPU model %d ",
889			boot_cpu_data.x86_model);
890		return -ENODEV;
891	}
892
893	memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
894		sizeof(hw_cache_event_ids));
895
896	pr_cont("Netburst events, ");
897
898	x86_pmu = p4_pmu;
899
900	return 0;
901}
902
903#endif /* CONFIG_CPU_SUP_INTEL */
904