1/**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#include <linux/oprofile.h>
12#include <linux/smp.h>
13#include <asm/msr.h>
14#include <asm/ptrace.h>
15#include <asm/fixmap.h>
16#include <asm/apic.h>
17#include <asm/nmi.h>
18
19#include "op_x86_model.h"
20#include "op_counter.h"
21
22#define NUM_EVENTS 39
23
24#define NUM_COUNTERS_NON_HT 8
25#define NUM_ESCRS_NON_HT 45
26#define NUM_CCCRS_NON_HT 18
27#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
28
29#define NUM_COUNTERS_HT2 4
30#define NUM_ESCRS_HT2 23
31#define NUM_CCCRS_HT2 9
32#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
33
34static unsigned int num_counters = NUM_COUNTERS_NON_HT;
35static unsigned int num_controls = NUM_CONTROLS_NON_HT;
36
37/* this has to be checked dynamically since the
38   hyper-threadedness of a chip is discovered at
39   kernel boot-time. */
40static inline void setup_num_counters(void)
41{
42#ifdef CONFIG_SMP
43	if (smp_num_siblings == 2){
44		num_counters = NUM_COUNTERS_HT2;
45		num_controls = NUM_CONTROLS_HT2;
46	}
47#endif
48}
49
50static int inline addr_increment(void)
51{
52#ifdef CONFIG_SMP
53	return smp_num_siblings == 2 ? 2 : 1;
54#else
55	return 1;
56#endif
57}
58
59
60/* tables to simulate simplified hardware view of p4 registers */
61struct p4_counter_binding {
62	int virt_counter;
63	int counter_address;
64	int cccr_address;
65};
66
67struct p4_event_binding {
68	int escr_select;  /* value to put in CCCR */
69	int event_select; /* value to put in ESCR */
70	struct {
71		int virt_counter; /* for this counter... */
72		int escr_address; /* use this ESCR       */
73	} bindings[2];
74};
75
76/* nb: these CTR_* defines are a duplicate of defines in
77   event/i386.p4*events. */
78
79
80#define CTR_BPU_0      (1 << 0)
81#define CTR_MS_0       (1 << 1)
82#define CTR_FLAME_0    (1 << 2)
83#define CTR_IQ_4       (1 << 3)
84#define CTR_BPU_2      (1 << 4)
85#define CTR_MS_2       (1 << 5)
86#define CTR_FLAME_2    (1 << 6)
87#define CTR_IQ_5       (1 << 7)
88
89static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
90	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
91	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
92	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
93	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
94	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
95	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
96	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
97	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
98};
99
100#define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
101
102/* p4 event codes in libop/op_event.h are indices into this table. */
103
104static struct p4_event_binding p4_events[NUM_EVENTS] = {
105
106	{ /* BRANCH_RETIRED */
107		0x05, 0x06,
108		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
109		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
110	},
111
112	{ /* MISPRED_BRANCH_RETIRED */
113		0x04, 0x03,
114		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
115		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
116	},
117
118	{ /* TC_DELIVER_MODE */
119		0x01, 0x01,
120		{ { CTR_MS_0, MSR_P4_TC_ESCR0},
121		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
122	},
123
124	{ /* BPU_FETCH_REQUEST */
125		0x00, 0x03,
126		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
127		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
128	},
129
130	{ /* ITLB_REFERENCE */
131		0x03, 0x18,
132		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
133		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
134	},
135
136	{ /* MEMORY_CANCEL */
137		0x05, 0x02,
138		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
139		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
140	},
141
142	{ /* MEMORY_COMPLETE */
143		0x02, 0x08,
144		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
145		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
146	},
147
148	{ /* LOAD_PORT_REPLAY */
149		0x02, 0x04,
150		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
151		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
152	},
153
154	{ /* STORE_PORT_REPLAY */
155		0x02, 0x05,
156		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
157		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
158	},
159
160	{ /* MOB_LOAD_REPLAY */
161		0x02, 0x03,
162		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
163		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
164	},
165
166	{ /* PAGE_WALK_TYPE */
167		0x04, 0x01,
168		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
169		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
170	},
171
172	{ /* BSQ_CACHE_REFERENCE */
173		0x07, 0x0c,
174		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
175		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
176	},
177
178	{ /* IOQ_ALLOCATION */
179		0x06, 0x03,
180		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
181		  { 0, 0 } }
182	},
183
184	{ /* IOQ_ACTIVE_ENTRIES */
185		0x06, 0x1a,
186		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
187		  { 0, 0 } }
188	},
189
190	{ /* FSB_DATA_ACTIVITY */
191		0x06, 0x17,
192		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
193		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
194	},
195
196	{ /* BSQ_ALLOCATION */
197		0x07, 0x05,
198		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
199		  { 0, 0 } }
200	},
201
202	{ /* BSQ_ACTIVE_ENTRIES */
203		0x07, 0x06,
204		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
205		  { 0, 0 } }
206	},
207
208	{ /* X87_ASSIST */
209		0x05, 0x03,
210		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
211		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
212	},
213
214	{ /* SSE_INPUT_ASSIST */
215		0x01, 0x34,
216		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
217		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
218	},
219
220	{ /* PACKED_SP_UOP */
221		0x01, 0x08,
222		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
223		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
224	},
225
226	{ /* PACKED_DP_UOP */
227		0x01, 0x0c,
228		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
229		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
230	},
231
232	{ /* SCALAR_SP_UOP */
233		0x01, 0x0a,
234		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
235		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
236	},
237
238	{ /* SCALAR_DP_UOP */
239		0x01, 0x0e,
240		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
241		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
242	},
243
244	{ /* 64BIT_MMX_UOP */
245		0x01, 0x02,
246		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
247		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
248	},
249
250	{ /* 128BIT_MMX_UOP */
251		0x01, 0x1a,
252		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
253		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
254	},
255
256	{ /* X87_FP_UOP */
257		0x01, 0x04,
258		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
259		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
260	},
261
262	{ /* X87_SIMD_MOVES_UOP */
263		0x01, 0x2e,
264		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
265		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
266	},
267
268	{ /* MACHINE_CLEAR */
269		0x05, 0x02,
270		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
271		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
272	},
273
274	{ /* GLOBAL_POWER_EVENTS */
275		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
276		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
277		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
278	},
279
280	{ /* TC_MS_XFER */
281		0x00, 0x05,
282		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
283		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
284	},
285
286	{ /* UOP_QUEUE_WRITES */
287		0x00, 0x09,
288		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
289		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
290	},
291
292	{ /* FRONT_END_EVENT */
293		0x05, 0x08,
294		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
295		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
296	},
297
298	{ /* EXECUTION_EVENT */
299		0x05, 0x0c,
300		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
301		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
302	},
303
304	{ /* REPLAY_EVENT */
305		0x05, 0x09,
306		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
307		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
308	},
309
310	{ /* INSTR_RETIRED */
311		0x04, 0x02,
312		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
313		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
314	},
315
316	{ /* UOPS_RETIRED */
317		0x04, 0x01,
318		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
319		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
320	},
321
322	{ /* UOP_TYPE */
323		0x02, 0x02,
324		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
325		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
326	},
327
328	{ /* RETIRED_MISPRED_BRANCH_TYPE */
329		0x02, 0x05,
330		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
331		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
332	},
333
334	{ /* RETIRED_BRANCH_TYPE */
335		0x02, 0x04,
336		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
337		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
338	}
339};
340
341
342#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
343
344#define ESCR_RESERVED_BITS 0x80000003
345#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
346#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
347#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
348#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
349#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
350#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
351#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
352#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
353#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
354
355#define CCCR_RESERVED_BITS 0x38030FFF
356#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
357#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
358#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
359#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
360#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
361#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
362#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
363#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
364#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
365#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
366#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
367
368#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
369#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
370#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
371#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
372#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
373
374
375/* this assigns a "stagger" to the current CPU, which is used throughout
376   the code in this module as an extra array offset, to select the "even"
377   or "odd" part of all the divided resources. */
378static unsigned int get_stagger(void)
379{
380#ifdef CONFIG_SMP
381	int cpu = smp_processor_id();
382	return (cpu != first_cpu(cpu_sibling_map[cpu]));
383#endif
384	return 0;
385}
386
387
388/* finally, mediate access to a real hardware counter
389   by passing a "virtual" counter numer to this macro,
390   along with your stagger setting. */
391#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
392
393static unsigned long reset_value[NUM_COUNTERS_NON_HT];
394
395
396static void p4_fill_in_addresses(struct op_msrs * const msrs)
397{
398	unsigned int i;
399	unsigned int addr, cccraddr, stag;
400
401	setup_num_counters();
402	stag = get_stagger();
403
404	/* initialize some registers */
405	for (i = 0; i < num_counters; ++i) {
406		msrs->counters[i].addr = 0;
407	}
408	for (i = 0; i < num_controls; ++i) {
409		msrs->controls[i].addr = 0;
410	}
411
412	/* the counter & cccr registers we pay attention to */
413	for (i = 0; i < num_counters; ++i) {
414		addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
415		cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
416		if (reserve_perfctr_nmi(addr)){
417			msrs->counters[i].addr = addr;
418			msrs->controls[i].addr = cccraddr;
419		}
420	}
421
422	/* 43 ESCR registers in three or four discontiguous group */
423	for (addr = MSR_P4_BSU_ESCR0 + stag;
424	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
425		if (reserve_evntsel_nmi(addr))
426			msrs->controls[i].addr = addr;
427	}
428
429	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
430	 * to avoid special case in nmi_{save|restore}_registers() */
431	if (boot_cpu_data.x86_model >= 0x3) {
432		for (addr = MSR_P4_BSU_ESCR0 + stag;
433		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
434			if (reserve_evntsel_nmi(addr))
435				msrs->controls[i].addr = addr;
436		}
437	} else {
438		for (addr = MSR_P4_IQ_ESCR0 + stag;
439		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
440			if (reserve_evntsel_nmi(addr))
441				msrs->controls[i].addr = addr;
442		}
443	}
444
445	for (addr = MSR_P4_RAT_ESCR0 + stag;
446	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
447		if (reserve_evntsel_nmi(addr))
448			msrs->controls[i].addr = addr;
449	}
450
451	for (addr = MSR_P4_MS_ESCR0 + stag;
452	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
453		if (reserve_evntsel_nmi(addr))
454			msrs->controls[i].addr = addr;
455	}
456
457	for (addr = MSR_P4_IX_ESCR0 + stag;
458	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
459		if (reserve_evntsel_nmi(addr))
460			msrs->controls[i].addr = addr;
461	}
462
463	/* there are 2 remaining non-contiguously located ESCRs */
464
465	if (num_counters == NUM_COUNTERS_NON_HT) {
466		/* standard non-HT CPUs handle both remaining ESCRs*/
467		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
468			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
469		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
470			msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
471
472	} else if (stag == 0) {
473		/* HT CPUs give the first remainder to the even thread, as
474		   the 32nd control register */
475		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
476			msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
477
478	} else {
479		/* and two copies of the second to the odd thread,
480		   for the 22st and 23nd control registers */
481		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
482			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
483			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
484		}
485	}
486}
487
488
489static void pmc_setup_one_p4_counter(unsigned int ctr)
490{
491	int i;
492	int const maxbind = 2;
493	unsigned int cccr = 0;
494	unsigned int escr = 0;
495	unsigned int high = 0;
496	unsigned int counter_bit;
497	struct p4_event_binding *ev = NULL;
498	unsigned int stag;
499
500	stag = get_stagger();
501
502	/* convert from counter *number* to counter *bit* */
503	counter_bit = 1 << VIRT_CTR(stag, ctr);
504
505	/* find our event binding structure. */
506	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
507		printk(KERN_ERR
508		       "oprofile: P4 event code 0x%lx out of range\n",
509		       counter_config[ctr].event);
510		return;
511	}
512
513	ev = &(p4_events[counter_config[ctr].event - 1]);
514
515	for (i = 0; i < maxbind; i++) {
516		if (ev->bindings[i].virt_counter & counter_bit) {
517
518			/* modify ESCR */
519			ESCR_READ(escr, high, ev, i);
520			ESCR_CLEAR(escr);
521			if (stag == 0) {
522				ESCR_SET_USR_0(escr, counter_config[ctr].user);
523				ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
524			} else {
525				ESCR_SET_USR_1(escr, counter_config[ctr].user);
526				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
527			}
528			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
529			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
530			ESCR_WRITE(escr, high, ev, i);
531
532			/* modify CCCR */
533			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
534			CCCR_CLEAR(cccr);
535			CCCR_SET_REQUIRED_BITS(cccr);
536			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
537			if (stag == 0) {
538				CCCR_SET_PMI_OVF_0(cccr);
539			} else {
540				CCCR_SET_PMI_OVF_1(cccr);
541			}
542			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
543			return;
544		}
545	}
546
547	printk(KERN_ERR
548	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
549	       counter_config[ctr].event, stag, ctr);
550}
551
552
553static void p4_setup_ctrs(struct op_msrs const * const msrs)
554{
555	unsigned int i;
556	unsigned int low, high;
557	unsigned int stag;
558
559	stag = get_stagger();
560
561	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
562	if (! MISC_PMC_ENABLED_P(low)) {
563		printk(KERN_ERR "oprofile: P4 PMC not available\n");
564		return;
565	}
566
567	/* clear the cccrs we will use */
568	for (i = 0 ; i < num_counters ; i++) {
569		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
570			continue;
571		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
572		CCCR_CLEAR(low);
573		CCCR_SET_REQUIRED_BITS(low);
574		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
575	}
576
577	/* clear all escrs (including those outside our concern) */
578	for (i = num_counters; i < num_controls; i++) {
579		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
580			continue;
581		wrmsr(msrs->controls[i].addr, 0, 0);
582	}
583
584	/* setup all counters */
585	for (i = 0 ; i < num_counters ; ++i) {
586		if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
587			reset_value[i] = counter_config[i].count;
588			pmc_setup_one_p4_counter(i);
589			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
590		} else {
591			reset_value[i] = 0;
592		}
593	}
594}
595
596
597static int p4_check_ctrs(struct pt_regs * const regs,
598			 struct op_msrs const * const msrs)
599{
600	unsigned long ctr, low, high, stag, real;
601	int i;
602
603	stag = get_stagger();
604
605	for (i = 0; i < num_counters; ++i) {
606
607		if (!reset_value[i])
608			continue;
609
610
611		real = VIRT_CTR(stag, i);
612
613		CCCR_READ(low, high, real);
614 		CTR_READ(ctr, high, real);
615		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
616			oprofile_add_sample(regs, i);
617 			CTR_WRITE(reset_value[i], real);
618			CCCR_CLEAR_OVF(low);
619			CCCR_WRITE(low, high, real);
620 			CTR_WRITE(reset_value[i], real);
621		}
622	}
623
624	/* P4 quirk: you have to re-unmask the apic vector */
625	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
626
627	/* See op_model_ppro.c */
628	return 1;
629}
630
631
632static void p4_start(struct op_msrs const * const msrs)
633{
634	unsigned int low, high, stag;
635	int i;
636
637	stag = get_stagger();
638
639	for (i = 0; i < num_counters; ++i) {
640		if (!reset_value[i])
641			continue;
642		CCCR_READ(low, high, VIRT_CTR(stag, i));
643		CCCR_SET_ENABLE(low);
644		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
645	}
646}
647
648
649static void p4_stop(struct op_msrs const * const msrs)
650{
651	unsigned int low, high, stag;
652	int i;
653
654	stag = get_stagger();
655
656	for (i = 0; i < num_counters; ++i) {
657		if (!reset_value[i])
658			continue;
659		CCCR_READ(low, high, VIRT_CTR(stag, i));
660		CCCR_SET_DISABLE(low);
661		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
662	}
663}
664
665static void p4_shutdown(struct op_msrs const * const msrs)
666{
667	int i;
668
669	for (i = 0 ; i < num_counters ; ++i) {
670		if (CTR_IS_RESERVED(msrs,i))
671			release_perfctr_nmi(msrs->counters[i].addr);
672	}
673	/* some of the control registers are specially reserved in
674	 * conjunction with the counter registers (hence the starting offset).
675	 * This saves a few bits.
676	 */
677	for (i = num_counters ; i < num_controls ; ++i) {
678		if (CTRL_IS_RESERVED(msrs,i))
679			release_evntsel_nmi(msrs->controls[i].addr);
680	}
681}
682
683
684#ifdef CONFIG_SMP
685struct op_x86_model_spec const op_p4_ht2_spec = {
686	.num_counters = NUM_COUNTERS_HT2,
687	.num_controls = NUM_CONTROLS_HT2,
688	.fill_in_addresses = &p4_fill_in_addresses,
689	.setup_ctrs = &p4_setup_ctrs,
690	.check_ctrs = &p4_check_ctrs,
691	.start = &p4_start,
692	.stop = &p4_stop,
693	.shutdown = &p4_shutdown
694};
695#endif
696
697struct op_x86_model_spec const op_p4_spec = {
698	.num_counters = NUM_COUNTERS_NON_HT,
699	.num_controls = NUM_CONTROLS_NON_HT,
700	.fill_in_addresses = &p4_fill_in_addresses,
701	.setup_ctrs = &p4_setup_ctrs,
702	.check_ctrs = &p4_check_ctrs,
703	.start = &p4_start,
704	.stop = &p4_stop,
705	.shutdown = &p4_shutdown
706};
707