1/*
2 *  linux/arch/i386/nmi.c
3 *
4 *  NMI watchdog support on APIC systems
5 *
6 *  Started by Ingo Molnar <mingo@redhat.com>
7 *
8 *  Fixes:
9 *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
10 *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
11 *  Mikael Pettersson	: Pentium 4 support for local APIC NMI watchdog.
12 */
13
14#include <linux/config.h>
15#include <linux/mm.h>
16#include <linux/irq.h>
17#include <linux/delay.h>
18#include <linux/bootmem.h>
19#include <linux/smp_lock.h>
20#include <linux/interrupt.h>
21#include <linux/mc146818rtc.h>
22#include <linux/kernel_stat.h>
23
24#include <asm/smp.h>
25#include <asm/mtrr.h>
26#include <asm/mpspec.h>
27
28unsigned int nmi_watchdog = NMI_NONE;
29static unsigned int nmi_hz = HZ;
30unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
31extern void show_registers(struct pt_regs *regs);
32
33#define K7_EVNTSEL_ENABLE	(1 << 22)
34#define K7_EVNTSEL_INT		(1 << 20)
35#define K7_EVNTSEL_OS		(1 << 17)
36#define K7_EVNTSEL_USR		(1 << 16)
37#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
38#define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
39
40#define P6_EVNTSEL0_ENABLE	(1 << 22)
41#define P6_EVNTSEL_INT		(1 << 20)
42#define P6_EVNTSEL_OS		(1 << 17)
43#define P6_EVNTSEL_USR		(1 << 16)
44#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
45#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
46
47#define MSR_P4_MISC_ENABLE	0x1A0
48#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
49#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL	(1<<12)
50#define MSR_P4_PERFCTR0		0x300
51#define MSR_P4_CCCR0		0x360
52#define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
53#define P4_ESCR_OS		(1<<3)
54#define P4_ESCR_USR		(1<<2)
55#define P4_CCCR_OVF_PMI		(1<<26)
56#define P4_CCCR_THRESHOLD(N)	((N)<<20)
57#define P4_CCCR_COMPLEMENT	(1<<19)
58#define P4_CCCR_COMPARE		(1<<18)
59#define P4_CCCR_REQUIRED	(3<<16)
60#define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
61#define P4_CCCR_ENABLE		(1<<12)
62/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
63   CRU_ESCR0 (with any non-null event selector) through a complemented
64   max threshold. [IA32-Vol3, Section 14.9.9] */
65#define MSR_P4_IQ_COUNTER0	0x30C
66#define MSR_P4_IQ_CCCR0		0x36C
67#define MSR_P4_CRU_ESCR0	0x3B8
68#define P4_NMI_CRU_ESCR0	(P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
69#define P4_NMI_IQ_CCCR0	\
70	(P4_CCCR_OVF_PMI|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|	\
71	 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
72
73int __init check_nmi_watchdog (void)
74{
75	irq_cpustat_t tmp[NR_CPUS];
76	int j, cpu;
77
78	printk(KERN_INFO "testing NMI watchdog ... ");
79
80	memcpy(tmp, irq_stat, sizeof(tmp));
81	sti();
82	mdelay((10*1000)/nmi_hz); // wait 10 ticks
83
84	for (j = 0; j < smp_num_cpus; j++) {
85		cpu = cpu_logical_map(j);
86		if (nmi_count(cpu) - tmp[cpu].__nmi_count <= 5) {
87			printk("CPU#%d: NMI appears to be stuck!\n", cpu);
88			return -1;
89		}
90	}
91	printk("OK.\n");
92
93	/* now that we know it works we can reduce NMI frequency to
94	   something more reasonable; makes a difference in some configs */
95	if (nmi_watchdog == NMI_LOCAL_APIC)
96		nmi_hz = 1;
97
98	return 0;
99}
100
101static int __init setup_nmi_watchdog(char *str)
102{
103	int nmi;
104
105	get_option(&str, &nmi);
106
107	if (nmi >= NMI_INVALID)
108		return 0;
109	if (nmi == NMI_NONE)
110		nmi_watchdog = nmi;
111	/*
112	 * If any other x86 CPU has a local APIC, then
113	 * please test the NMI stuff there and send me the
114	 * missing bits. Right now Intel P6/P4 and AMD K7 only.
115	 */
116	if ((nmi == NMI_LOCAL_APIC) &&
117			(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
118			(boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
119		nmi_watchdog = nmi;
120	if ((nmi == NMI_LOCAL_APIC) &&
121			(boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
122			(boot_cpu_data.x86 == 6))
123		nmi_watchdog = nmi;
124	/*
125	 * We can enable the IO-APIC watchdog
126	 * unconditionally.
127	 */
128	if (nmi == NMI_IO_APIC)
129		nmi_watchdog = nmi;
130	return 1;
131}
132
133__setup("nmi_watchdog=", setup_nmi_watchdog);
134
135#ifdef CONFIG_PM
136
137#include <linux/pm.h>
138
139struct pm_dev *nmi_pmdev;
140
141static void disable_apic_nmi_watchdog(void)
142{
143	switch (boot_cpu_data.x86_vendor) {
144	case X86_VENDOR_AMD:
145		wrmsr(MSR_K7_EVNTSEL0, 0, 0);
146		break;
147	case X86_VENDOR_INTEL:
148		switch (boot_cpu_data.x86) {
149		case 6:
150			wrmsr(MSR_P6_EVNTSEL0, 0, 0);
151			break;
152		case 15:
153			wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
154			wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
155			break;
156		}
157		break;
158	}
159}
160
161static int nmi_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *data)
162{
163	switch (rqst) {
164	case PM_SUSPEND:
165		disable_apic_nmi_watchdog();
166		break;
167	case PM_RESUME:
168		setup_apic_nmi_watchdog();
169		break;
170	}
171	return 0;
172}
173
174static void nmi_pm_init(void)
175{
176	if (!nmi_pmdev)
177		nmi_pmdev = apic_pm_register(PM_SYS_DEV, 0, nmi_pm_callback);
178}
179
180#define __pminit	/*empty*/
181
182#else	/* CONFIG_PM */
183
184static inline void nmi_pm_init(void) { }
185
186#define __pminit	__init
187
188#endif	/* CONFIG_PM */
189
190/*
191 * Activate the NMI watchdog via the local APIC.
192 * Original code written by Keith Owens.
193 */
194
195static void __pminit clear_msr_range(unsigned int base, unsigned int n)
196{
197	unsigned int i;
198
199	for(i = 0; i < n; ++i)
200		wrmsr(base+i, 0, 0);
201}
202
203static void __pminit setup_k7_watchdog(void)
204{
205	unsigned int evntsel;
206
207	nmi_perfctr_msr = MSR_K7_PERFCTR0;
208
209	clear_msr_range(MSR_K7_EVNTSEL0, 4);
210	clear_msr_range(MSR_K7_PERFCTR0, 4);
211
212	evntsel = K7_EVNTSEL_INT
213		| K7_EVNTSEL_OS
214		| K7_EVNTSEL_USR
215		| K7_NMI_EVENT;
216
217	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
218	Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
219	wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
220	apic_write(APIC_LVTPC, APIC_DM_NMI);
221	evntsel |= K7_EVNTSEL_ENABLE;
222	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
223}
224
225static void __pminit setup_p6_watchdog(void)
226{
227	unsigned int evntsel;
228
229	nmi_perfctr_msr = MSR_P6_PERFCTR0;
230
231	clear_msr_range(MSR_P6_EVNTSEL0, 2);
232	clear_msr_range(MSR_P6_PERFCTR0, 2);
233
234	evntsel = P6_EVNTSEL_INT
235		| P6_EVNTSEL_OS
236		| P6_EVNTSEL_USR
237		| P6_NMI_EVENT;
238
239	wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
240	Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
241	wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
242	apic_write(APIC_LVTPC, APIC_DM_NMI);
243	evntsel |= P6_EVNTSEL0_ENABLE;
244	wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
245}
246
247static int __pminit setup_p4_watchdog(void)
248{
249	unsigned int misc_enable, dummy;
250
251	rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
252	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
253		return 0;
254
255	nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
256
257	if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
258		clear_msr_range(0x3F1, 2);
259	/* MSR 0x3F0 seems to have a default value of 0xFC00, but current
260	   docs doesn't fully define it, so leave it alone for now. */
261	clear_msr_range(0x3A0, 31);
262	clear_msr_range(0x3C0, 6);
263	clear_msr_range(0x3C8, 6);
264	clear_msr_range(0x3E0, 2);
265	clear_msr_range(MSR_P4_CCCR0, 18);
266	clear_msr_range(MSR_P4_PERFCTR0, 18);
267
268	wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
269	wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
270	Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
271	wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
272	apic_write(APIC_LVTPC, APIC_DM_NMI);
273	wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
274	return 1;
275}
276
277void __pminit setup_apic_nmi_watchdog (void)
278{
279	switch (boot_cpu_data.x86_vendor) {
280	case X86_VENDOR_AMD:
281		if (boot_cpu_data.x86 != 6)
282			return;
283		setup_k7_watchdog();
284		break;
285	case X86_VENDOR_INTEL:
286		switch (boot_cpu_data.x86) {
287		case 6:
288			setup_p6_watchdog();
289			break;
290		case 15:
291			if (!setup_p4_watchdog())
292				return;
293			break;
294		default:
295			return;
296		}
297		break;
298	default:
299		return;
300	}
301	nmi_pm_init();
302}
303
304static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED;
305
306/*
307 * the best way to detect whether a CPU has a 'hard lockup' problem
308 * is to check it's local APIC timer IRQ counts. If they are not
309 * changing then that CPU has some problem.
310 *
311 * as these watchdog NMI IRQs are generated on every CPU, we only
312 * have to check the current processor.
313 *
314 * since NMIs dont listen to _any_ locks, we have to be extremely
315 * careful not to rely on unsafe variables. The printk might lock
316 * up though, so we have to break up any console locks first ...
317 * [when there will be more tty-related locks, break them up
318 *  here too!]
319 */
320
321static unsigned int
322	last_irq_sums [NR_CPUS],
323	alert_counter [NR_CPUS];
324
325void touch_nmi_watchdog (void)
326{
327	int i;
328
329	/*
330	 * Just reset the alert counters, (other CPUs might be
331	 * spinning on locks we hold):
332	 */
333	for (i = 0; i < smp_num_cpus; i++)
334		alert_counter[i] = 0;
335}
336
337void nmi_watchdog_tick (struct pt_regs * regs)
338{
339
340	/*
341	 * Since current-> is always on the stack, and we always switch
342	 * the stack NMI-atomically, it's safe to use smp_processor_id().
343	 */
344	int sum, cpu = smp_processor_id();
345
346	sum = apic_timer_irqs[cpu];
347
348	if (last_irq_sums[cpu] == sum) {
349		/*
350		 * Ayiee, looks like this CPU is stuck ...
351		 * wait a few IRQs (5 seconds) before doing the oops ...
352		 */
353		alert_counter[cpu]++;
354		if (alert_counter[cpu] == 5*nmi_hz) {
355			spin_lock(&nmi_print_lock);
356			/*
357			 * We are in trouble anyway, lets at least try
358			 * to get a message out.
359			 */
360			bust_spinlocks(1);
361			printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip);
362			show_registers(regs);
363			printk("console shuts up ...\n");
364			console_silent();
365			spin_unlock(&nmi_print_lock);
366			bust_spinlocks(0);
367			do_exit(SIGSEGV);
368		}
369	} else {
370		last_irq_sums[cpu] = sum;
371		alert_counter[cpu] = 0;
372	}
373	if (nmi_perfctr_msr) {
374		if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
375			/*
376			 * P4 quirks:
377			 * - An overflown perfctr will assert its interrupt
378			 *   until the OVF flag in its CCCR is cleared.
379			 * - LVTPC is masked on interrupt and must be
380			 *   unmasked by the LVTPC handler.
381			 */
382			wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
383			apic_write(APIC_LVTPC, APIC_DM_NMI);
384		}
385		wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
386	}
387}
388