1/* 2 * Machine check injection support. 3 * Copyright 2008 Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License 7 * as published by the Free Software Foundation; version 2 8 * of the License. 9 * 10 * Authors: 11 * Andi Kleen 12 * Ying Huang 13 */ 14#include <linux/uaccess.h> 15#include <linux/module.h> 16#include <linux/timer.h> 17#include <linux/kernel.h> 18#include <linux/string.h> 19#include <linux/fs.h> 20#include <linux/smp.h> 21#include <linux/notifier.h> 22#include <linux/kdebug.h> 23#include <linux/cpu.h> 24#include <linux/sched.h> 25#include <linux/gfp.h> 26#include <asm/mce.h> 27#include <asm/apic.h> 28 29/* Update fake mce registers on current CPU. */ 30static void inject_mce(struct mce *m) 31{ 32 struct mce *i = &per_cpu(injectm, m->extcpu); 33 34 /* Make sure noone reads partially written injectm */ 35 i->finished = 0; 36 mb(); 37 m->finished = 0; 38 /* First set the fields after finished */ 39 i->extcpu = m->extcpu; 40 mb(); 41 /* Now write record in order, finished last (except above) */ 42 memcpy(i, m, sizeof(struct mce)); 43 /* Finally activate it */ 44 mb(); 45 i->finished = 1; 46} 47 48static void raise_poll(struct mce *m) 49{ 50 unsigned long flags; 51 mce_banks_t b; 52 53 memset(&b, 0xff, sizeof(mce_banks_t)); 54 local_irq_save(flags); 55 machine_check_poll(0, &b); 56 local_irq_restore(flags); 57 m->finished = 0; 58} 59 60static void raise_exception(struct mce *m, struct pt_regs *pregs) 61{ 62 struct pt_regs regs; 63 unsigned long flags; 64 65 if (!pregs) { 66 memset(®s, 0, sizeof(struct pt_regs)); 67 regs.ip = m->ip; 68 regs.cs = m->cs; 69 pregs = ®s; 70 } 71 /* in mcheck exeception handler, irq will be disabled */ 72 local_irq_save(flags); 73 do_machine_check(pregs, 0); 74 local_irq_restore(flags); 75 m->finished = 0; 76} 77 78static cpumask_var_t mce_inject_cpumask; 79 80static int mce_raise_notify(struct notifier_block *self, 81 unsigned long val, void *data) 82{ 83 struct die_args *args = (struct die_args *)data; 84 int cpu = smp_processor_id(); 85 struct mce *m = &__get_cpu_var(injectm); 86 if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) 87 return NOTIFY_DONE; 88 cpumask_clear_cpu(cpu, mce_inject_cpumask); 89 if (m->inject_flags & MCJ_EXCEPTION) 90 raise_exception(m, args->regs); 91 else if (m->status) 92 raise_poll(m); 93 return NOTIFY_STOP; 94} 95 96static struct notifier_block mce_raise_nb = { 97 .notifier_call = mce_raise_notify, 98 .priority = 1000, 99}; 100 101/* Inject mce on current CPU */ 102static int raise_local(void) 103{ 104 struct mce *m = &__get_cpu_var(injectm); 105 int context = MCJ_CTX(m->inject_flags); 106 int ret = 0; 107 int cpu = m->extcpu; 108 109 if (m->inject_flags & MCJ_EXCEPTION) { 110 printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); 111 switch (context) { 112 case MCJ_CTX_IRQ: 113 /* 114 * Could do more to fake interrupts like 115 * calling irq_enter, but the necessary 116 * machinery isn't exported currently. 117 */ 118 /*FALL THROUGH*/ 119 case MCJ_CTX_PROCESS: 120 raise_exception(m, NULL); 121 break; 122 default: 123 printk(KERN_INFO "Invalid MCE context\n"); 124 ret = -EINVAL; 125 } 126 printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); 127 } else if (m->status) { 128 printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); 129 raise_poll(m); 130 mce_notify_irq(); 131 printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu); 132 } else 133 m->finished = 0; 134 135 return ret; 136} 137 138static void raise_mce(struct mce *m) 139{ 140 int context = MCJ_CTX(m->inject_flags); 141 142 inject_mce(m); 143 144 if (context == MCJ_CTX_RANDOM) 145 return; 146 147#ifdef CONFIG_X86_LOCAL_APIC 148 if (m->inject_flags & MCJ_NMI_BROADCAST) { 149 unsigned long start; 150 int cpu; 151 get_online_cpus(); 152 cpumask_copy(mce_inject_cpumask, cpu_online_mask); 153 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); 154 for_each_online_cpu(cpu) { 155 struct mce *mcpu = &per_cpu(injectm, cpu); 156 if (!mcpu->finished || 157 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) 158 cpumask_clear_cpu(cpu, mce_inject_cpumask); 159 } 160 if (!cpumask_empty(mce_inject_cpumask)) 161 apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); 162 start = jiffies; 163 while (!cpumask_empty(mce_inject_cpumask)) { 164 if (!time_before(jiffies, start + 2*HZ)) { 165 printk(KERN_ERR 166 "Timeout waiting for mce inject NMI %lx\n", 167 *cpumask_bits(mce_inject_cpumask)); 168 break; 169 } 170 cpu_relax(); 171 } 172 raise_local(); 173 put_cpu(); 174 put_online_cpus(); 175 } else 176#endif 177 raise_local(); 178} 179 180/* Error injection interface */ 181static ssize_t mce_write(struct file *filp, const char __user *ubuf, 182 size_t usize, loff_t *off) 183{ 184 struct mce m; 185 186 if (!capable(CAP_SYS_ADMIN)) 187 return -EPERM; 188 /* 189 * There are some cases where real MSR reads could slip 190 * through. 191 */ 192 if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA)) 193 return -EIO; 194 195 if ((unsigned long)usize > sizeof(struct mce)) 196 usize = sizeof(struct mce); 197 if (copy_from_user(&m, ubuf, usize)) 198 return -EFAULT; 199 200 if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) 201 return -EINVAL; 202 203 /* 204 * Need to give user space some time to set everything up, 205 * so do it a jiffie or two later everywhere. 206 */ 207 schedule_timeout(2); 208 raise_mce(&m); 209 return usize; 210} 211 212static int inject_init(void) 213{ 214 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) 215 return -ENOMEM; 216 printk(KERN_INFO "Machine check injector initialized\n"); 217 mce_chrdev_ops.write = mce_write; 218 register_die_notifier(&mce_raise_nb); 219 return 0; 220} 221 222module_init(inject_init); 223/* 224 * Cannot tolerate unloading currently because we cannot 225 * guarantee all openers of mce_chrdev will get a reference to us. 226 */ 227MODULE_LICENSE("GPL"); 228