1/* 2 * linux/arch/i386/nmi.c 3 * 4 * NMI watchdog support on APIC systems 5 * 6 * Started by Ingo Molnar <mingo@redhat.com> 7 * 8 * Fixes: 9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. 10 * Mikael Pettersson : Power Management for local APIC NMI watchdog. 11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. 12 * Pavel Machek and 13 * Mikael Pettersson : PM converted to driver model. Disable/enable API. 14 */ 15 16#include <linux/delay.h> 17#include <linux/interrupt.h> 18#include <linux/module.h> 19#include <linux/nmi.h> 20#include <linux/sysdev.h> 21#include <linux/sysctl.h> 22#include <linux/percpu.h> 23#include <linux/kprobes.h> 24#include <linux/cpumask.h> 25#include <linux/kernel_stat.h> 26#include <linux/kdebug.h> 27 28#include <asm/smp.h> 29#include <asm/nmi.h> 30 31#include "mach_traps.h" 32 33int unknown_nmi_panic; 34int nmi_watchdog_enabled; 35 36static cpumask_t backtrace_mask = CPU_MASK_NONE; 37 38/* nmi_active: 39 * >0: the lapic NMI watchdog is active, but can be disabled 40 * <0: the lapic NMI watchdog has not been set up, and cannot 41 * be enabled 42 * 0: the lapic NMI watchdog is disabled, but can be enabled 43 */ 44atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ 45 46unsigned int nmi_watchdog = NMI_DEFAULT; 47static unsigned int nmi_hz = HZ; 48 49static DEFINE_PER_CPU(short, wd_enabled); 50 51/* local prototypes */ 52static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); 53 54static int endflag __initdata = 0; 55 56#ifdef CONFIG_SMP 57/* The performance counters used by NMI_LOCAL_APIC don't trigger when 58 * the CPU is idle. To make sure the NMI watchdog really ticks on all 59 * CPUs during the test make them busy. 60 */ 61static __init void nmi_cpu_busy(void *data) 62{ 63 local_irq_enable_in_hardirq(); 64 /* Intentionally don't use cpu_relax here. This is 65 to make sure that the performance counter really ticks, 66 even if there is a simulator or similar that catches the 67 pause instruction. On a real HT machine this is fine because 68 all other CPUs are busy with "useless" delay loops and don't 69 care if they get somewhat less cycles. */ 70 while (endflag == 0) 71 mb(); 72} 73#endif 74 75static int __init check_nmi_watchdog(void) 76{ 77 unsigned int *prev_nmi_count; 78 int cpu; 79 80 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) 81 return 0; 82 83 if (!atomic_read(&nmi_active)) 84 return 0; 85 86 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); 87 if (!prev_nmi_count) 88 return -1; 89 90 printk(KERN_INFO "Testing NMI watchdog ... "); 91 92 if (nmi_watchdog == NMI_LOCAL_APIC) 93 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); 94 95 for_each_possible_cpu(cpu) 96 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; 97 local_irq_enable(); 98 mdelay((20*1000)/nmi_hz); // wait 20 ticks 99 100 for_each_possible_cpu(cpu) { 101#ifdef CONFIG_SMP 102 /* Check cpu_callin_map here because that is set 103 after the timer is started. */ 104 if (!cpu_isset(cpu, cpu_callin_map)) 105 continue; 106#endif 107 if (!per_cpu(wd_enabled, cpu)) 108 continue; 109 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { 110 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", 111 cpu, 112 prev_nmi_count[cpu], 113 nmi_count(cpu)); 114 per_cpu(wd_enabled, cpu) = 0; 115 atomic_dec(&nmi_active); 116 } 117 } 118 if (!atomic_read(&nmi_active)) { 119 kfree(prev_nmi_count); 120 atomic_set(&nmi_active, -1); 121 return -1; 122 } 123 endflag = 1; 124 printk("OK.\n"); 125 126 /* now that we know it works we can reduce NMI frequency to 127 something more reasonable; makes a difference in some configs */ 128 if (nmi_watchdog == NMI_LOCAL_APIC) 129 nmi_hz = lapic_adjust_nmi_hz(1); 130 131 kfree(prev_nmi_count); 132 return 0; 133} 134/* This needs to happen later in boot so counters are working */ 135late_initcall(check_nmi_watchdog); 136 137static int __init setup_nmi_watchdog(char *str) 138{ 139 int nmi; 140 141 get_option(&str, &nmi); 142 143 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) 144 return 0; 145 146 nmi_watchdog = nmi; 147 return 1; 148} 149 150__setup("nmi_watchdog=", setup_nmi_watchdog); 151 152 153/* Suspend/resume support */ 154 155#ifdef CONFIG_PM 156 157static int nmi_pm_active; /* nmi_active before suspend */ 158 159static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) 160{ 161 /* only CPU0 goes here, other CPUs should be offline */ 162 nmi_pm_active = atomic_read(&nmi_active); 163 stop_apic_nmi_watchdog(NULL); 164 BUG_ON(atomic_read(&nmi_active) != 0); 165 return 0; 166} 167 168static int lapic_nmi_resume(struct sys_device *dev) 169{ 170 /* only CPU0 goes here, other CPUs should be offline */ 171 if (nmi_pm_active > 0) { 172 setup_apic_nmi_watchdog(NULL); 173 touch_nmi_watchdog(); 174 } 175 return 0; 176} 177 178 179static struct sysdev_class nmi_sysclass = { 180 set_kset_name("lapic_nmi"), 181 .resume = lapic_nmi_resume, 182 .suspend = lapic_nmi_suspend, 183}; 184 185static struct sys_device device_lapic_nmi = { 186 .id = 0, 187 .cls = &nmi_sysclass, 188}; 189 190static int __init init_lapic_nmi_sysfs(void) 191{ 192 int error; 193 194 /* should really be a BUG_ON but b/c this is an 195 * init call, it just doesn't work. -dcz 196 */ 197 if (nmi_watchdog != NMI_LOCAL_APIC) 198 return 0; 199 200 if (atomic_read(&nmi_active) < 0) 201 return 0; 202 203 error = sysdev_class_register(&nmi_sysclass); 204 if (!error) 205 error = sysdev_register(&device_lapic_nmi); 206 return error; 207} 208/* must come after the local APIC's device_initcall() */ 209late_initcall(init_lapic_nmi_sysfs); 210 211#endif /* CONFIG_PM */ 212 213static void __acpi_nmi_enable(void *__unused) 214{ 215 apic_write_around(APIC_LVT0, APIC_DM_NMI); 216} 217 218/* 219 * Enable timer based NMIs on all CPUs: 220 */ 221void acpi_nmi_enable(void) 222{ 223 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) 224 on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); 225} 226 227static void __acpi_nmi_disable(void *__unused) 228{ 229 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); 230} 231 232/* 233 * Disable timer based NMIs on all CPUs: 234 */ 235void acpi_nmi_disable(void) 236{ 237 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) 238 on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); 239} 240 241void setup_apic_nmi_watchdog (void *unused) 242{ 243 if (__get_cpu_var(wd_enabled)) 244 return; 245 246 /* cheap hack to support suspend/resume */ 247 /* if cpu0 is not active neither should the other cpus */ 248 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) 249 return; 250 251 switch (nmi_watchdog) { 252 case NMI_LOCAL_APIC: 253 __get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */ 254 if (lapic_watchdog_init(nmi_hz) < 0) { 255 __get_cpu_var(wd_enabled) = 0; 256 return; 257 } 258 /* FALL THROUGH */ 259 case NMI_IO_APIC: 260 __get_cpu_var(wd_enabled) = 1; 261 atomic_inc(&nmi_active); 262 } 263} 264 265void stop_apic_nmi_watchdog(void *unused) 266{ 267 /* only support LOCAL and IO APICs for now */ 268 if ((nmi_watchdog != NMI_LOCAL_APIC) && 269 (nmi_watchdog != NMI_IO_APIC)) 270 return; 271 if (__get_cpu_var(wd_enabled) == 0) 272 return; 273 if (nmi_watchdog == NMI_LOCAL_APIC) 274 lapic_watchdog_stop(); 275 __get_cpu_var(wd_enabled) = 0; 276 atomic_dec(&nmi_active); 277} 278 279/* 280 * the best way to detect whether a CPU has a 'hard lockup' problem 281 * is to check it's local APIC timer IRQ counts. If they are not 282 * changing then that CPU has some problem. 283 * 284 * as these watchdog NMI IRQs are generated on every CPU, we only 285 * have to check the current processor. 286 * 287 * since NMIs don't listen to _any_ locks, we have to be extremely 288 * careful not to rely on unsafe variables. The printk might lock 289 * up though, so we have to break up any console locks first ... 290 * [when there will be more tty-related locks, break them up 291 * here too!] 292 */ 293 294static unsigned int 295 last_irq_sums [NR_CPUS], 296 alert_counter [NR_CPUS]; 297 298void touch_nmi_watchdog (void) 299{ 300 if (nmi_watchdog > 0) { 301 unsigned cpu; 302 303 /* 304 * Just reset the alert counters, (other CPUs might be 305 * spinning on locks we hold): 306 */ 307 for_each_present_cpu (cpu) 308 alert_counter[cpu] = 0; 309 } 310 311 /* 312 * Tickle the softlockup detector too: 313 */ 314 touch_softlockup_watchdog(); 315} 316EXPORT_SYMBOL(touch_nmi_watchdog); 317 318extern void die_nmi(struct pt_regs *, const char *msg); 319 320__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) 321{ 322 323 /* 324 * Since current_thread_info()-> is always on the stack, and we 325 * always switch the stack NMI-atomically, it's safe to use 326 * smp_processor_id(). 327 */ 328 unsigned int sum; 329 int touched = 0; 330 int cpu = smp_processor_id(); 331 int rc=0; 332 333 /* check for other users first */ 334 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 335 == NOTIFY_STOP) { 336 rc = 1; 337 touched = 1; 338 } 339 340 if (cpu_isset(cpu, backtrace_mask)) { 341 static DEFINE_SPINLOCK(lock); /* Serialise the printks */ 342 343 spin_lock(&lock); 344 printk("NMI backtrace for cpu %d\n", cpu); 345 dump_stack(); 346 spin_unlock(&lock); 347 cpu_clear(cpu, backtrace_mask); 348 } 349 350 /* 351 * Take the local apic timer and PIT/HPET into account. We don't 352 * know which one is active, when we have highres/dyntick on 353 */ 354 sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0); 355 356 /* if the none of the timers isn't firing, this cpu isn't doing much */ 357 if (!touched && last_irq_sums[cpu] == sum) { 358 /* 359 * Ayiee, looks like this CPU is stuck ... 360 * wait a few IRQs (5 seconds) before doing the oops ... 361 */ 362 alert_counter[cpu]++; 363 if (alert_counter[cpu] == 5*nmi_hz) 364 /* 365 * die_nmi will return ONLY if NOTIFY_STOP happens.. 366 */ 367 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); 368 } else { 369 last_irq_sums[cpu] = sum; 370 alert_counter[cpu] = 0; 371 } 372 /* see if the nmi watchdog went off */ 373 if (!__get_cpu_var(wd_enabled)) 374 return rc; 375 switch (nmi_watchdog) { 376 case NMI_LOCAL_APIC: 377 rc |= lapic_wd_event(nmi_hz); 378 break; 379 case NMI_IO_APIC: 380 /* don't know how to accurately check for this. 381 * just assume it was a watchdog timer interrupt 382 * This matches the old behaviour. 383 */ 384 rc = 1; 385 break; 386 } 387 return rc; 388} 389 390int do_nmi_callback(struct pt_regs * regs, int cpu) 391{ 392#ifdef CONFIG_SYSCTL 393 if (unknown_nmi_panic) 394 return unknown_nmi_panic_callback(regs, cpu); 395#endif 396 return 0; 397} 398 399#ifdef CONFIG_SYSCTL 400 401static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) 402{ 403 unsigned char reason = get_nmi_reason(); 404 char buf[64]; 405 406 sprintf(buf, "NMI received for unknown reason %02x\n", reason); 407 die_nmi(regs, buf); 408 return 0; 409} 410 411/* 412 * proc handler for /proc/sys/kernel/nmi 413 */ 414int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, 415 void __user *buffer, size_t *length, loff_t *ppos) 416{ 417 int old_state; 418 419 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; 420 old_state = nmi_watchdog_enabled; 421 proc_dointvec(table, write, file, buffer, length, ppos); 422 if (!!old_state == !!nmi_watchdog_enabled) 423 return 0; 424 425 if (atomic_read(&nmi_active) < 0) { 426 printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); 427 return -EIO; 428 } 429 430 if (nmi_watchdog == NMI_DEFAULT) { 431 if (lapic_watchdog_ok()) 432 nmi_watchdog = NMI_LOCAL_APIC; 433 else 434 nmi_watchdog = NMI_IO_APIC; 435 } 436 437 if (nmi_watchdog == NMI_LOCAL_APIC) { 438 if (nmi_watchdog_enabled) 439 enable_lapic_nmi_watchdog(); 440 else 441 disable_lapic_nmi_watchdog(); 442 } else { 443 printk( KERN_WARNING 444 "NMI watchdog doesn't know what hardware to touch\n"); 445 return -EIO; 446 } 447 return 0; 448} 449 450#endif 451 452void __trigger_all_cpu_backtrace(void) 453{ 454 int i; 455 456 backtrace_mask = cpu_online_map; 457 /* Wait for up to 10 seconds for all CPUs to do the backtrace */ 458 for (i = 0; i < 10 * 1000; i++) { 459 if (cpus_empty(backtrace_mask)) 460 break; 461 mdelay(1); 462 } 463} 464 465EXPORT_SYMBOL(nmi_active); 466EXPORT_SYMBOL(nmi_watchdog); 467