1/* 2 * Copyright (c) 2007-2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <kern/kalloc.h> 30#include <mach/mach_time.h> 31#include <i386/cpu_data.h> 32#include <i386/cpuid.h> 33#include <i386/cpu_topology.h> 34#include <i386/cpu_threads.h> 35#include <i386/lapic.h> 36#include <i386/machine_cpu.h> 37#include <i386/machine_check.h> 38#include <i386/proc_reg.h> 39 40/* 41 * At the time of the machine-check exception, all hardware-threads panic. 42 * Each thread saves the state of its MCA registers to its per-cpu data area. 43 * 44 * State reporting is serialized so one thread dumps all valid state for all 45 * threads to the panic log. This may entail spinning waiting for other 46 * threads to complete saving state to memory. A timeout applies to this wait 47 * -- in particular, a 3-strikes timeout may prevent a thread from taking 48 * part is the affair. 49 */ 50 51#define IF(bool,str) ((bool) ? (str) : "") 52 53static boolean_t mca_initialized = FALSE; 54static boolean_t mca_MCE_present = FALSE; 55static boolean_t mca_MCA_present = FALSE; 56static uint32_t mca_family = 0; 57static unsigned int mca_error_bank_count = 0; 58static boolean_t mca_control_MSR_present = FALSE; 59static boolean_t mca_cmci_present = FALSE; 60static ia32_mcg_cap_t ia32_mcg_cap; 61decl_simple_lock_data(static, mca_lock); 62 63typedef struct { 64 ia32_mci_ctl_t mca_mci_ctl; 65 ia32_mci_status_t mca_mci_status; 66 ia32_mci_misc_t mca_mci_misc; 67 ia32_mci_addr_t mca_mci_addr; 68} mca_mci_bank_t; 69 70typedef struct mca_state { 71 boolean_t mca_is_saved; 72 boolean_t mca_is_valid; /* some state is valid */ 73 ia32_mcg_ctl_t mca_mcg_ctl; 74 ia32_mcg_status_t mca_mcg_status; 75 mca_mci_bank_t mca_error_bank[0]; 76} mca_state_t; 77 78typedef enum { 79 CLEAR, 80 DUMPING, 81 DUMPED 82} mca_dump_state_t; 83static volatile mca_dump_state_t mca_dump_state = CLEAR; 84 85static void 86mca_get_availability(void) 87{ 88 uint64_t features = cpuid_info()->cpuid_features; 89 uint32_t family = cpuid_info()->cpuid_family; 90 uint32_t model = cpuid_info()->cpuid_model; 91 uint32_t stepping = cpuid_info()->cpuid_stepping; 92 93 if ((model == CPUID_MODEL_HASWELL && stepping < 3) || 94 (model == CPUID_MODEL_HASWELL_ULT && stepping < 1) || 95 (model == CPUID_MODEL_CRYSTALWELL && stepping < 1)) 96 panic("Haswell pre-C0 steppings are not supported"); 97 98 mca_MCE_present = (features & CPUID_FEATURE_MCE) != 0; 99 mca_MCA_present = (features & CPUID_FEATURE_MCA) != 0; 100 mca_family = family; 101 102 /* 103 * If MCA, the number of banks etc is reported by the IA32_MCG_CAP MSR. 104 */ 105 if (mca_MCA_present) { 106 ia32_mcg_cap.u64 = rdmsr64(IA32_MCG_CAP); 107 mca_error_bank_count = ia32_mcg_cap.bits.count; 108 mca_control_MSR_present = ia32_mcg_cap.bits.mcg_ctl_p; 109 mca_cmci_present = ia32_mcg_cap.bits.mcg_ext_corr_err_p; 110 } 111} 112 113void 114mca_cpu_init(void) 115{ 116 unsigned int i; 117 118 /* 119 * The first (boot) processor is responsible for discovering the 120 * machine check architecture present on this machine. 121 */ 122 if (!mca_initialized) { 123 mca_get_availability(); 124 mca_initialized = TRUE; 125 simple_lock_init(&mca_lock, 0); 126 } 127 128 if (mca_MCA_present) { 129 130 /* Enable all MCA features */ 131 if (mca_control_MSR_present) 132 wrmsr64(IA32_MCG_CTL, IA32_MCG_CTL_ENABLE); 133 134 switch (mca_family) { 135 case 0x06: 136 /* Enable all but mc0 */ 137 for (i = 1; i < mca_error_bank_count; i++) 138 wrmsr64(IA32_MCi_CTL(i),0xFFFFFFFFFFFFFFFFULL); 139 140 /* Clear all errors */ 141 for (i = 0; i < mca_error_bank_count; i++) 142 wrmsr64(IA32_MCi_STATUS(i), 0ULL); 143 break; 144 case 0x0F: 145 /* Enable all banks */ 146 for (i = 0; i < mca_error_bank_count; i++) 147 wrmsr64(IA32_MCi_CTL(i),0xFFFFFFFFFFFFFFFFULL); 148 149 /* Clear all errors */ 150 for (i = 0; i < mca_error_bank_count; i++) 151 wrmsr64(IA32_MCi_STATUS(i), 0ULL); 152 break; 153 } 154 } 155 156 /* Enable machine check exception handling if available */ 157 if (mca_MCE_present) { 158 set_cr4(get_cr4()|CR4_MCE); 159 } 160} 161 162boolean_t 163mca_is_cmci_present(void) 164{ 165 if (!mca_initialized) 166 mca_cpu_init(); 167 return mca_cmci_present; 168} 169 170void 171mca_cpu_alloc(cpu_data_t *cdp) 172{ 173 vm_size_t mca_state_size; 174 175 /* 176 * Allocate space for an array of error banks. 177 */ 178 mca_state_size = sizeof(mca_state_t) + 179 sizeof(mca_mci_bank_t) * mca_error_bank_count; 180 cdp->cpu_mca_state = kalloc(mca_state_size); 181 if (cdp->cpu_mca_state == NULL) { 182 printf("mca_cpu_alloc() failed for cpu %d\n", cdp->cpu_number); 183 return; 184 } 185 bzero((void *) cdp->cpu_mca_state, mca_state_size); 186 187 /* 188 * If the boot processor is yet have its allocation made, 189 * do this now. 190 */ 191 if (cpu_datap(master_cpu)->cpu_mca_state == NULL) 192 mca_cpu_alloc(cpu_datap(master_cpu)); 193} 194 195static void 196mca_save_state(mca_state_t *mca_state) 197{ 198 mca_mci_bank_t *bank; 199 unsigned int i; 200 201 assert(!ml_get_interrupts_enabled() || get_preemption_level() > 0); 202 203 if (mca_state == NULL) 204 return; 205 206 mca_state->mca_mcg_ctl = mca_control_MSR_present ? 207 rdmsr64(IA32_MCG_CTL) : 0ULL; 208 mca_state->mca_mcg_status.u64 = rdmsr64(IA32_MCG_STATUS); 209 210 bank = (mca_mci_bank_t *) &mca_state->mca_error_bank[0]; 211 for (i = 0; i < mca_error_bank_count; i++, bank++) { 212 bank->mca_mci_ctl = rdmsr64(IA32_MCi_CTL(i)); 213 bank->mca_mci_status.u64 = rdmsr64(IA32_MCi_STATUS(i)); 214 if (!bank->mca_mci_status.bits.val) 215 continue; 216 bank->mca_mci_misc = (bank->mca_mci_status.bits.miscv)? 217 rdmsr64(IA32_MCi_MISC(i)) : 0ULL; 218 bank->mca_mci_addr = (bank->mca_mci_status.bits.addrv)? 219 rdmsr64(IA32_MCi_ADDR(i)) : 0ULL; 220 mca_state->mca_is_valid = TRUE; 221 } 222 223 /* 224 * If we're the first thread with MCA state, point our package to it 225 * and don't care about races 226 */ 227 if (x86_package()->mca_state == NULL) 228 x86_package()->mca_state = mca_state; 229 230 mca_state->mca_is_saved = TRUE; 231} 232 233void 234mca_check_save(void) 235{ 236 if (mca_dump_state > CLEAR) 237 mca_save_state(current_cpu_datap()->cpu_mca_state); 238} 239 240static void 241mca_report_cpu_info(void) 242{ 243 i386_cpu_info_t *infop = cpuid_info(); 244 245 kdb_printf(" family: %d model: %d stepping: %d microcode: %d\n", 246 infop->cpuid_family, 247 infop->cpuid_model, 248 infop->cpuid_stepping, 249 infop->cpuid_microcode_version); 250 kdb_printf(" signature: 0x%x\n", 251 infop->cpuid_signature); 252 kdb_printf(" %s\n", 253 infop->cpuid_brand_string); 254 255} 256 257static void 258mca_dump_bank(mca_state_t *state, int i) 259{ 260 mca_mci_bank_t *bank; 261 ia32_mci_status_t status; 262 263 bank = &state->mca_error_bank[i]; 264 status = bank->mca_mci_status; 265 if (!status.bits.val) 266 return; 267 268 kdb_printf(" IA32_MC%d_STATUS(0x%x): 0x%016qx\n", 269 i, IA32_MCi_STATUS(i), status.u64); 270 271 if (status.bits.addrv) 272 kdb_printf(" IA32_MC%d_ADDR(0x%x): 0x%016qx\n", 273 i, IA32_MCi_ADDR(i), bank->mca_mci_addr); 274 275 if (status.bits.miscv) 276 kdb_printf(" IA32_MC%d_MISC(0x%x): 0x%016qx\n", 277 i, IA32_MCi_MISC(i), bank->mca_mci_misc); 278} 279 280static void 281mca_cpu_dump_error_banks(mca_state_t *state) 282{ 283 unsigned int i; 284 285 if (!state->mca_is_valid) 286 return; 287 288 for (i = 0; i < mca_error_bank_count; i++ ) { 289 mca_dump_bank(state, i); 290 } 291} 292 293void 294mca_dump(void) 295{ 296 mca_state_t *mca_state = current_cpu_datap()->cpu_mca_state; 297 uint64_t deadline; 298 unsigned int i = 0; 299 300 /* 301 * Capture local MCA registers to per-cpu data. 302 */ 303 mca_save_state(mca_state); 304 305 /* 306 * Serialize: the first caller controls dumping MCA registers, 307 * other threads spin meantime. 308 */ 309 simple_lock(&mca_lock); 310 if (mca_dump_state > CLEAR) { 311 simple_unlock(&mca_lock); 312 while (mca_dump_state == DUMPING) 313 cpu_pause(); 314 return; 315 } 316 mca_dump_state = DUMPING; 317 simple_unlock(&mca_lock); 318 319 /* 320 * Wait for all other hardware threads to save their state. 321 * Or timeout. 322 */ 323 deadline = mach_absolute_time() + LockTimeOut; 324 while (mach_absolute_time() < deadline && i < real_ncpus) { 325 if (!cpu_datap(i)->cpu_mca_state->mca_is_saved) { 326 cpu_pause(); 327 continue; 328 } 329 i += 1; 330 } 331 332 /* 333 * Report machine-check capabilities: 334 */ 335 kdb_printf("Machine-check capabilities: 0x%016qx\n", ia32_mcg_cap.u64); 336 337 mca_report_cpu_info(); 338 339 kdb_printf(" %d error-reporting banks\n", mca_error_bank_count); 340 341 /* 342 * Dump all processor state: 343 */ 344 for (i = 0; i < real_ncpus; i++) { 345 mca_state_t *mcsp = cpu_datap(i)->cpu_mca_state; 346 ia32_mcg_status_t status; 347 348 if (mcsp == NULL || 349 mcsp->mca_is_saved == FALSE || 350 mcsp->mca_mcg_status.u64 == 0 || 351 !mcsp->mca_is_valid) { 352 continue; 353 } 354 status = mcsp->mca_mcg_status; 355 kdb_printf("Processor %d: IA32_MCG_STATUS: 0x%016qx\n", 356 i, status.u64); 357 mca_cpu_dump_error_banks(mcsp); 358 } 359 360 /* Update state to release any other threads. */ 361 mca_dump_state = DUMPED; 362} 363 364 365extern void mca_exception_panic(void); 366extern void lapic_trigger_MC(void); 367void mca_exception_panic(void) 368{ 369#if DEBUG 370 lapic_trigger_MC(); 371#else 372 kprintf("mca_exception_panic() requires DEBUG build\n"); 373#endif 374} 375