1/* 2 * P4 specific Machine Check Exception Reporting 3 */ 4 5#include <linux/init.h> 6#include <linux/types.h> 7#include <linux/kernel.h> 8#include <linux/interrupt.h> 9#include <linux/smp.h> 10 11#include <asm/processor.h> 12#include <asm/system.h> 13#include <asm/msr.h> 14#include <asm/apic.h> 15 16#include <asm/therm_throt.h> 17 18#include "mce.h" 19 20/* as supported by the P4/Xeon family */ 21struct intel_mce_extended_msrs { 22 u32 eax; 23 u32 ebx; 24 u32 ecx; 25 u32 edx; 26 u32 esi; 27 u32 edi; 28 u32 ebp; 29 u32 esp; 30 u32 eflags; 31 u32 eip; 32 /* u32 *reserved[]; */ 33}; 34 35static int mce_num_extended_msrs = 0; 36 37 38#ifdef CONFIG_X86_MCE_P4THERMAL 39static void unexpected_thermal_interrupt(struct pt_regs *regs) 40{ 41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", 42 smp_processor_id()); 43 add_taint(TAINT_MACHINE_CHECK); 44} 45 46/* P4/Xeon Thermal transition interrupt handler */ 47static void intel_thermal_interrupt(struct pt_regs *regs) 48{ 49 __u64 msr_val; 50 51 ack_APIC_irq(); 52 53 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 54 therm_throt_process(msr_val & 0x1); 55} 56 57/* Thermal interrupt handler for this CPU setup */ 58static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; 59 60fastcall void smp_thermal_interrupt(struct pt_regs *regs) 61{ 62 irq_enter(); 63 vendor_thermal_interrupt(regs); 64 irq_exit(); 65} 66 67/* P4/Xeon Thermal regulation detect and init */ 68static void intel_init_thermal(struct cpuinfo_x86 *c) 69{ 70 u32 l, h; 71 unsigned int cpu = smp_processor_id(); 72 73 /* Thermal monitoring */ 74 if (!cpu_has(c, X86_FEATURE_ACPI)) 75 return; /* -ENODEV */ 76 77 /* Clock modulation */ 78 if (!cpu_has(c, X86_FEATURE_ACC)) 79 return; /* -ENODEV */ 80 81 /* first check if its enabled already, in which case there might 82 * be some SMM goo which handles it, so we can't even put a handler 83 * since it might be delivered via SMI already -zwanem. 84 */ 85 rdmsr (MSR_IA32_MISC_ENABLE, l, h); 86 h = apic_read(APIC_LVTTHMR); 87 if ((l & (1<<3)) && (h & APIC_DM_SMI)) { 88 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", 89 cpu); 90 return; /* -EBUSY */ 91 } 92 93 /* check whether a vector already exists, temporarily masked? */ 94 if (h & APIC_VECTOR_MASK) { 95 printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " 96 "installed\n", 97 cpu, (h & APIC_VECTOR_MASK)); 98 return; /* -EBUSY */ 99 } 100 101 /* The temperature transition interrupt handler setup */ 102 h = THERMAL_APIC_VECTOR; /* our delivery vector */ 103 h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ 104 apic_write_around(APIC_LVTTHMR, h); 105 106 rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); 107 wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); 108 109 /* ok we're good to go... */ 110 vendor_thermal_interrupt = intel_thermal_interrupt; 111 112 rdmsr (MSR_IA32_MISC_ENABLE, l, h); 113 wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); 114 115 l = apic_read (APIC_LVTTHMR); 116 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 117 printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); 118 119 /* enable thermal throttle processing */ 120 atomic_set(&therm_throt_en, 1); 121 return; 122} 123#endif /* CONFIG_X86_MCE_P4THERMAL */ 124 125 126/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ 127static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) 128{ 129 u32 h; 130 131 rdmsr (MSR_IA32_MCG_EAX, r->eax, h); 132 rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); 133 rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); 134 rdmsr (MSR_IA32_MCG_EDX, r->edx, h); 135 rdmsr (MSR_IA32_MCG_ESI, r->esi, h); 136 rdmsr (MSR_IA32_MCG_EDI, r->edi, h); 137 rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); 138 rdmsr (MSR_IA32_MCG_ESP, r->esp, h); 139 rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); 140 rdmsr (MSR_IA32_MCG_EIP, r->eip, h); 141} 142 143static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) 144{ 145 int recover=1; 146 u32 alow, ahigh, high, low; 147 u32 mcgstl, mcgsth; 148 int i; 149 150 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 151 if (mcgstl & (1<<0)) /* Recoverable ? */ 152 recover=0; 153 154 printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", 155 smp_processor_id(), mcgsth, mcgstl); 156 157 if (mce_num_extended_msrs > 0) { 158 struct intel_mce_extended_msrs dbg; 159 intel_get_extended_msrs(&dbg); 160 printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", 161 smp_processor_id(), dbg.eip, dbg.eflags); 162 printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", 163 dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); 164 printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", 165 dbg.esi, dbg.edi, dbg.ebp, dbg.esp); 166 } 167 168 for (i=0; i<nr_mce_banks; i++) { 169 rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); 170 if (high & (1<<31)) { 171 if (high & (1<<29)) 172 recover |= 1; 173 if (high & (1<<25)) 174 recover |= 2; 175 printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); 176 high &= ~(1<<31); 177 if (high & (1<<27)) { 178 rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); 179 printk ("[%08x%08x]", ahigh, alow); 180 } 181 if (high & (1<<26)) { 182 rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); 183 printk (" at %08x%08x", ahigh, alow); 184 } 185 printk ("\n"); 186 } 187 } 188 189 if (recover & 2) 190 panic ("CPU context corrupt"); 191 if (recover & 1) 192 panic ("Unable to continue"); 193 194 printk(KERN_EMERG "Attempting to continue.\n"); 195 /* 196 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 197 * recoverable/continuable.This will allow BIOS to look at the MSRs 198 * for errors if the OS could not log the error. 199 */ 200 for (i=0; i<nr_mce_banks; i++) { 201 u32 msr; 202 msr = MSR_IA32_MC0_STATUS+i*4; 203 rdmsr (msr, low, high); 204 if (high&(1<<31)) { 205 /* Clear it */ 206 wrmsr(msr, 0UL, 0UL); 207 /* Serialize */ 208 wmb(); 209 add_taint(TAINT_MACHINE_CHECK); 210 } 211 } 212 mcgstl &= ~(1<<2); 213 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); 214} 215 216 217void intel_p4_mcheck_init(struct cpuinfo_x86 *c) 218{ 219 u32 l, h; 220 int i; 221 222 machine_check_vector = intel_machine_check; 223 wmb(); 224 225 printk (KERN_INFO "Intel machine check architecture supported.\n"); 226 rdmsr (MSR_IA32_MCG_CAP, l, h); 227 if (l & (1<<8)) /* Control register present ? */ 228 wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 229 nr_mce_banks = l & 0xff; 230 231 for (i=0; i<nr_mce_banks; i++) { 232 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); 233 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); 234 } 235 236 set_in_cr4 (X86_CR4_MCE); 237 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", 238 smp_processor_id()); 239 240 /* Check for P4/Xeon extended MCE MSRs */ 241 rdmsr (MSR_IA32_MCG_CAP, l, h); 242 if (l & (1<<9)) {/* MCG_EXT_P */ 243 mce_num_extended_msrs = (l >> 16) & 0xff; 244 printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" 245 " available\n", 246 smp_processor_id(), mce_num_extended_msrs); 247 248#ifdef CONFIG_X86_MCE_P4THERMAL 249 /* Check for P4/Xeon Thermal monitor */ 250 intel_init_thermal(c); 251#endif 252 } 253} 254