1/* 2 * linux/arch/i386/nmi.c 3 * 4 * NMI watchdog support on APIC systems 5 * 6 * Started by Ingo Molnar <mingo@redhat.com> 7 * 8 * Fixes: 9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. 10 * Mikael Pettersson : Power Management for local APIC NMI watchdog. 11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. 12 */ 13 14#include <linux/config.h> 15#include <linux/mm.h> 16#include <linux/irq.h> 17#include <linux/delay.h> 18#include <linux/bootmem.h> 19#include <linux/smp_lock.h> 20#include <linux/interrupt.h> 21#include <linux/mc146818rtc.h> 22#include <linux/kernel_stat.h> 23 24#include <asm/smp.h> 25#include <asm/mtrr.h> 26#include <asm/mpspec.h> 27 28unsigned int nmi_watchdog = NMI_NONE; 29static unsigned int nmi_hz = HZ; 30unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ 31extern void show_registers(struct pt_regs *regs); 32 33#define K7_EVNTSEL_ENABLE (1 << 22) 34#define K7_EVNTSEL_INT (1 << 20) 35#define K7_EVNTSEL_OS (1 << 17) 36#define K7_EVNTSEL_USR (1 << 16) 37#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 38#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 39 40#define P6_EVNTSEL0_ENABLE (1 << 22) 41#define P6_EVNTSEL_INT (1 << 20) 42#define P6_EVNTSEL_OS (1 << 17) 43#define P6_EVNTSEL_USR (1 << 16) 44#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 45#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED 46 47#define MSR_P4_MISC_ENABLE 0x1A0 48#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) 49#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) 50#define MSR_P4_PERFCTR0 0x300 51#define MSR_P4_CCCR0 0x360 52#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) 53#define P4_ESCR_OS (1<<3) 54#define P4_ESCR_USR (1<<2) 55#define P4_CCCR_OVF_PMI (1<<26) 56#define P4_CCCR_THRESHOLD(N) ((N)<<20) 57#define P4_CCCR_COMPLEMENT (1<<19) 58#define P4_CCCR_COMPARE (1<<18) 59#define P4_CCCR_REQUIRED (3<<16) 60#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) 61#define P4_CCCR_ENABLE (1<<12) 62/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter 63 CRU_ESCR0 (with any non-null event selector) through a complemented 64 max threshold. [IA32-Vol3, Section 14.9.9] */ 65#define MSR_P4_IQ_COUNTER0 0x30C 66#define MSR_P4_IQ_CCCR0 0x36C 67#define MSR_P4_CRU_ESCR0 0x3B8 68#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) 69#define P4_NMI_IQ_CCCR0 \ 70 (P4_CCCR_OVF_PMI|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ 71 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) 72 73int __init check_nmi_watchdog (void) 74{ 75 irq_cpustat_t tmp[NR_CPUS]; 76 int j, cpu; 77 78 printk(KERN_INFO "testing NMI watchdog ... "); 79 80 memcpy(tmp, irq_stat, sizeof(tmp)); 81 sti(); 82 mdelay((10*1000)/nmi_hz); // wait 10 ticks 83 84 for (j = 0; j < smp_num_cpus; j++) { 85 cpu = cpu_logical_map(j); 86 if (nmi_count(cpu) - tmp[cpu].__nmi_count <= 5) { 87 printk("CPU#%d: NMI appears to be stuck!\n", cpu); 88 return -1; 89 } 90 } 91 printk("OK.\n"); 92 93 /* now that we know it works we can reduce NMI frequency to 94 something more reasonable; makes a difference in some configs */ 95 if (nmi_watchdog == NMI_LOCAL_APIC) 96 nmi_hz = 1; 97 98 return 0; 99} 100 101static int __init setup_nmi_watchdog(char *str) 102{ 103 int nmi; 104 105 get_option(&str, &nmi); 106 107 if (nmi >= NMI_INVALID) 108 return 0; 109 if (nmi == NMI_NONE) 110 nmi_watchdog = nmi; 111 /* 112 * If any other x86 CPU has a local APIC, then 113 * please test the NMI stuff there and send me the 114 * missing bits. Right now Intel P6/P4 and AMD K7 only. 115 */ 116 if ((nmi == NMI_LOCAL_APIC) && 117 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 118 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) 119 nmi_watchdog = nmi; 120 if ((nmi == NMI_LOCAL_APIC) && 121 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && 122 (boot_cpu_data.x86 == 6)) 123 nmi_watchdog = nmi; 124 /* 125 * We can enable the IO-APIC watchdog 126 * unconditionally. 127 */ 128 if (nmi == NMI_IO_APIC) 129 nmi_watchdog = nmi; 130 return 1; 131} 132 133__setup("nmi_watchdog=", setup_nmi_watchdog); 134 135#ifdef CONFIG_PM 136 137#include <linux/pm.h> 138 139struct pm_dev *nmi_pmdev; 140 141static void disable_apic_nmi_watchdog(void) 142{ 143 switch (boot_cpu_data.x86_vendor) { 144 case X86_VENDOR_AMD: 145 wrmsr(MSR_K7_EVNTSEL0, 0, 0); 146 break; 147 case X86_VENDOR_INTEL: 148 switch (boot_cpu_data.x86) { 149 case 6: 150 wrmsr(MSR_P6_EVNTSEL0, 0, 0); 151 break; 152 case 15: 153 wrmsr(MSR_P4_IQ_CCCR0, 0, 0); 154 wrmsr(MSR_P4_CRU_ESCR0, 0, 0); 155 break; 156 } 157 break; 158 } 159} 160 161static int nmi_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *data) 162{ 163 switch (rqst) { 164 case PM_SUSPEND: 165 disable_apic_nmi_watchdog(); 166 break; 167 case PM_RESUME: 168 setup_apic_nmi_watchdog(); 169 break; 170 } 171 return 0; 172} 173 174static void nmi_pm_init(void) 175{ 176 if (!nmi_pmdev) 177 nmi_pmdev = apic_pm_register(PM_SYS_DEV, 0, nmi_pm_callback); 178} 179 180#define __pminit /*empty*/ 181 182#else /* CONFIG_PM */ 183 184static inline void nmi_pm_init(void) { } 185 186#define __pminit __init 187 188#endif /* CONFIG_PM */ 189 190/* 191 * Activate the NMI watchdog via the local APIC. 192 * Original code written by Keith Owens. 193 */ 194 195static void __pminit clear_msr_range(unsigned int base, unsigned int n) 196{ 197 unsigned int i; 198 199 for(i = 0; i < n; ++i) 200 wrmsr(base+i, 0, 0); 201} 202 203static void __pminit setup_k7_watchdog(void) 204{ 205 unsigned int evntsel; 206 207 nmi_perfctr_msr = MSR_K7_PERFCTR0; 208 209 clear_msr_range(MSR_K7_EVNTSEL0, 4); 210 clear_msr_range(MSR_K7_PERFCTR0, 4); 211 212 evntsel = K7_EVNTSEL_INT 213 | K7_EVNTSEL_OS 214 | K7_EVNTSEL_USR 215 | K7_NMI_EVENT; 216 217 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 218 Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); 219 wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1); 220 apic_write(APIC_LVTPC, APIC_DM_NMI); 221 evntsel |= K7_EVNTSEL_ENABLE; 222 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 223} 224 225static void __pminit setup_p6_watchdog(void) 226{ 227 unsigned int evntsel; 228 229 nmi_perfctr_msr = MSR_P6_PERFCTR0; 230 231 clear_msr_range(MSR_P6_EVNTSEL0, 2); 232 clear_msr_range(MSR_P6_PERFCTR0, 2); 233 234 evntsel = P6_EVNTSEL_INT 235 | P6_EVNTSEL_OS 236 | P6_EVNTSEL_USR 237 | P6_NMI_EVENT; 238 239 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 240 Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); 241 wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0); 242 apic_write(APIC_LVTPC, APIC_DM_NMI); 243 evntsel |= P6_EVNTSEL0_ENABLE; 244 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 245} 246 247static int __pminit setup_p4_watchdog(void) 248{ 249 unsigned int misc_enable, dummy; 250 251 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); 252 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) 253 return 0; 254 255 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; 256 257 if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) 258 clear_msr_range(0x3F1, 2); 259 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current 260 docs doesn't fully define it, so leave it alone for now. */ 261 clear_msr_range(0x3A0, 31); 262 clear_msr_range(0x3C0, 6); 263 clear_msr_range(0x3C8, 6); 264 clear_msr_range(0x3E0, 2); 265 clear_msr_range(MSR_P4_CCCR0, 18); 266 clear_msr_range(MSR_P4_PERFCTR0, 18); 267 268 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); 269 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); 270 Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); 271 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); 272 apic_write(APIC_LVTPC, APIC_DM_NMI); 273 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0); 274 return 1; 275} 276 277void __pminit setup_apic_nmi_watchdog (void) 278{ 279 switch (boot_cpu_data.x86_vendor) { 280 case X86_VENDOR_AMD: 281 if (boot_cpu_data.x86 != 6) 282 return; 283 setup_k7_watchdog(); 284 break; 285 case X86_VENDOR_INTEL: 286 switch (boot_cpu_data.x86) { 287 case 6: 288 setup_p6_watchdog(); 289 break; 290 case 15: 291 if (!setup_p4_watchdog()) 292 return; 293 break; 294 default: 295 return; 296 } 297 break; 298 default: 299 return; 300 } 301 nmi_pm_init(); 302} 303 304static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED; 305 306/* 307 * the best way to detect whether a CPU has a 'hard lockup' problem 308 * is to check it's local APIC timer IRQ counts. If they are not 309 * changing then that CPU has some problem. 310 * 311 * as these watchdog NMI IRQs are generated on every CPU, we only 312 * have to check the current processor. 313 * 314 * since NMIs dont listen to _any_ locks, we have to be extremely 315 * careful not to rely on unsafe variables. The printk might lock 316 * up though, so we have to break up any console locks first ... 317 * [when there will be more tty-related locks, break them up 318 * here too!] 319 */ 320 321static unsigned int 322 last_irq_sums [NR_CPUS], 323 alert_counter [NR_CPUS]; 324 325void touch_nmi_watchdog (void) 326{ 327 int i; 328 329 /* 330 * Just reset the alert counters, (other CPUs might be 331 * spinning on locks we hold): 332 */ 333 for (i = 0; i < smp_num_cpus; i++) 334 alert_counter[i] = 0; 335} 336 337void nmi_watchdog_tick (struct pt_regs * regs) 338{ 339 340 /* 341 * Since current-> is always on the stack, and we always switch 342 * the stack NMI-atomically, it's safe to use smp_processor_id(). 343 */ 344 int sum, cpu = smp_processor_id(); 345 346 sum = apic_timer_irqs[cpu]; 347 348 if (last_irq_sums[cpu] == sum) { 349 /* 350 * Ayiee, looks like this CPU is stuck ... 351 * wait a few IRQs (5 seconds) before doing the oops ... 352 */ 353 alert_counter[cpu]++; 354 if (alert_counter[cpu] == 5*nmi_hz) { 355 spin_lock(&nmi_print_lock); 356 /* 357 * We are in trouble anyway, lets at least try 358 * to get a message out. 359 */ 360 bust_spinlocks(1); 361 printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip); 362 show_registers(regs); 363 printk("console shuts up ...\n"); 364 console_silent(); 365 spin_unlock(&nmi_print_lock); 366 bust_spinlocks(0); 367 do_exit(SIGSEGV); 368 } 369 } else { 370 last_irq_sums[cpu] = sum; 371 alert_counter[cpu] = 0; 372 } 373 if (nmi_perfctr_msr) { 374 if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { 375 /* 376 * P4 quirks: 377 * - An overflown perfctr will assert its interrupt 378 * until the OVF flag in its CCCR is cleared. 379 * - LVTPC is masked on interrupt and must be 380 * unmasked by the LVTPC handler. 381 */ 382 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0); 383 apic_write(APIC_LVTPC, APIC_DM_NMI); 384 } 385 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); 386 } 387} 388