1/* 2 * Copyright (c) 2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <kern/kalloc.h> 30#include <mach/mach_time.h> 31#include <i386/cpu_data.h> 32#include <i386/cpuid.h> 33#include <i386/cpu_topology.h> 34#include <i386/cpu_threads.h> 35#include <i386/machine_cpu.h> 36#include <i386/machine_check.h> 37#include <i386/proc_reg.h> 38 39#define IF(bool,str) ((bool) ? (str) : "") 40 41static boolean_t mca_initialized = FALSE; 42static boolean_t mca_MCE_present = FALSE; 43static boolean_t mca_MCA_present = FALSE; 44static uint32_t mca_family = 0; 45static unsigned int mca_error_bank_count = 0; 46static boolean_t mca_control_MSR_present = FALSE; 47static boolean_t mca_threshold_status_present = FALSE; 48static boolean_t mca_extended_MSRs_present = FALSE; 49static unsigned int mca_extended_MSRs_count = 0; 50static boolean_t mca_cmci_present = FALSE; 51static ia32_mcg_cap_t ia32_mcg_cap; 52decl_simple_lock_data(static, mca_lock); 53 54typedef struct { 55 ia32_mci_ctl_t mca_mci_ctl; 56 ia32_mci_status_t mca_mci_status; 57 ia32_mci_misc_t mca_mci_misc; 58 ia32_mci_addr_t mca_mci_addr; 59} mca_mci_bank_t; 60 61typedef struct mca_state { 62 ia32_mcg_ctl_t mca_mcg_ctl; 63 ia32_mcg_status_t mca_mcg_status; 64 mca_mci_bank_t mca_error_bank[0]; 65} mca_state_t; 66 67typedef enum { 68 CLEAR, 69 DUMPING, 70 DUMPED 71} mca_dump_state_t; 72static volatile mca_dump_state_t mca_dump_state = CLEAR; 73 74static void 75mca_get_availability(void) 76{ 77 uint64_t features = cpuid_info()->cpuid_features; 78 uint32_t family = cpuid_info()->cpuid_family; 79 80 mca_MCE_present = (features & CPUID_FEATURE_MCE) != 0; 81 mca_MCA_present = (features & CPUID_FEATURE_MCA) != 0; 82 mca_family = family; 83 84 /* 85 * If MCA, the number of banks etc is reported by the IA32_MCG_CAP MSR. 86 */ 87 if (mca_MCA_present) { 88 ia32_mcg_cap.u64 = rdmsr64(IA32_MCG_CAP); 89 mca_error_bank_count = ia32_mcg_cap.bits.count; 90 mca_control_MSR_present = ia32_mcg_cap.bits.mcg_ctl_p; 91 mca_threshold_status_present = ia32_mcg_cap.bits.mcg_tes_p; 92 mca_cmci_present = ia32_mcg_cap.bits.mcg_ext_corr_err_p; 93 if (family == 0x0F) { 94 mca_extended_MSRs_present = ia32_mcg_cap.bits.mcg_ext_p; 95 mca_extended_MSRs_count = ia32_mcg_cap.bits.mcg_ext_cnt; 96 } 97 } 98} 99 100void 101mca_cpu_init(void) 102{ 103 unsigned int i; 104 105 /* 106 * The first (boot) processor is responsible for discovering the 107 * machine check architecture present on this machine. 108 */ 109 if (!mca_initialized) { 110 mca_get_availability(); 111 mca_initialized = TRUE; 112 simple_lock_init(&mca_lock, 0); 113 } 114 115 if (mca_MCA_present) { 116 117 /* Enable all MCA features */ 118 if (mca_control_MSR_present) 119 wrmsr64(IA32_MCG_CTL, IA32_MCG_CTL_ENABLE); 120 121 switch (mca_family) { 122 case 0x06: 123 /* Enable all but mc0 */ 124 for (i = 1; i < mca_error_bank_count; i++) 125 wrmsr64(IA32_MCi_CTL(i),0xFFFFFFFFFFFFFFFFULL); 126 127 /* Clear all errors */ 128 for (i = 0; i < mca_error_bank_count; i++) 129 wrmsr64(IA32_MCi_STATUS(i), 0ULL); 130 break; 131 case 0x0F: 132 /* Enable all banks */ 133 for (i = 0; i < mca_error_bank_count; i++) 134 wrmsr64(IA32_MCi_CTL(i),0xFFFFFFFFFFFFFFFFULL); 135 136 /* Clear all errors */ 137 for (i = 0; i < mca_error_bank_count; i++) 138 wrmsr64(IA32_MCi_STATUS(i), 0ULL); 139 break; 140 } 141 } 142 143 /* Enable machine check exception handling if available */ 144 if (mca_MCE_present) { 145 set_cr4(get_cr4()|CR4_MCE); 146 } 147} 148 149boolean_t 150mca_is_cmci_present(void) 151{ 152 if (!mca_initialized) 153 mca_cpu_init(); 154 return mca_cmci_present; 155} 156 157void 158mca_cpu_alloc(cpu_data_t *cdp) 159{ 160 vm_size_t mca_state_size; 161 162 /* 163 * Allocate space for an array of error banks. 164 */ 165 mca_state_size = sizeof(mca_state_t) + 166 sizeof(mca_mci_bank_t) * mca_error_bank_count; 167 cdp->cpu_mca_state = kalloc(mca_state_size); 168 if (cdp->cpu_mca_state == NULL) { 169 printf("mca_cpu_alloc() failed for cpu %d\n", cdp->cpu_number); 170 return; 171 } 172 bzero((void *) cdp->cpu_mca_state, mca_state_size); 173 174 /* 175 * If the boot processor is yet have its allocation made, 176 * do this now. 177 */ 178 if (cpu_datap(master_cpu)->cpu_mca_state == NULL) 179 mca_cpu_alloc(cpu_datap(master_cpu)); 180} 181 182static void 183mca_save_state(mca_state_t *mca_state) 184{ 185 mca_mci_bank_t *bank; 186 unsigned int i; 187 188 assert(!ml_get_interrupts_enabled() || get_preemption_level() > 0); 189 190 if (mca_state == NULL) 191 return; 192 193 mca_state->mca_mcg_ctl = mca_control_MSR_present ? 194 rdmsr64(IA32_MCG_CTL) : 0ULL; 195 mca_state->mca_mcg_status.u64 = rdmsr64(IA32_MCG_STATUS); 196 197 bank = (mca_mci_bank_t *) &mca_state->mca_error_bank[0]; 198 for (i = 0; i < mca_error_bank_count; i++, bank++) { 199 bank->mca_mci_ctl = rdmsr64(IA32_MCi_CTL(i)); 200 bank->mca_mci_status.u64 = rdmsr64(IA32_MCi_STATUS(i)); 201 if (!bank->mca_mci_status.bits.val) 202 continue; 203 bank->mca_mci_misc = (bank->mca_mci_status.bits.miscv)? 204 rdmsr64(IA32_MCi_MISC(i)) : 0ULL; 205 bank->mca_mci_addr = (bank->mca_mci_status.bits.addrv)? 206 rdmsr64(IA32_MCi_ADDR(i)) : 0ULL; 207 } 208 209 /* 210 * If we're the first thread with MCA state, point our package to it 211 * and don't care about races 212 */ 213 if (x86_package()->mca_state == NULL) 214 x86_package()->mca_state = mca_state; 215} 216 217void 218mca_check_save(void) 219{ 220 if (mca_dump_state > CLEAR) 221 mca_save_state(current_cpu_datap()->cpu_mca_state); 222} 223 224static void mca_dump_64bit_state(void) 225{ 226 kdb_printf("Extended Machine Check State:\n"); 227 kdb_printf(" IA32_MCG_RAX: 0x%016qx\n", rdmsr64(IA32_MCG_RAX)); 228 kdb_printf(" IA32_MCG_RBX: 0x%016qx\n", rdmsr64(IA32_MCG_RBX)); 229 kdb_printf(" IA32_MCG_RCX: 0x%016qx\n", rdmsr64(IA32_MCG_RCX)); 230 kdb_printf(" IA32_MCG_RDX: 0x%016qx\n", rdmsr64(IA32_MCG_RDX)); 231 kdb_printf(" IA32_MCG_RSI: 0x%016qx\n", rdmsr64(IA32_MCG_RSI)); 232 kdb_printf(" IA32_MCG_RDI: 0x%016qx\n", rdmsr64(IA32_MCG_RDI)); 233 kdb_printf(" IA32_MCG_RBP: 0x%016qx\n", rdmsr64(IA32_MCG_RBP)); 234 kdb_printf(" IA32_MCG_RSP: 0x%016qx\n", rdmsr64(IA32_MCG_RSP)); 235 kdb_printf(" IA32_MCG_RFLAGS: 0x%016qx\n", rdmsr64(IA32_MCG_RFLAGS)); 236 kdb_printf(" IA32_MCG_RIP: 0x%016qx\n", rdmsr64(IA32_MCG_RIP)); 237 kdb_printf(" IA32_MCG_MISC: 0x%016qx\n", rdmsr64(IA32_MCG_MISC)); 238 kdb_printf(" IA32_MCG_R8: 0x%016qx\n", rdmsr64(IA32_MCG_R8)); 239 kdb_printf(" IA32_MCG_R9: 0x%016qx\n", rdmsr64(IA32_MCG_R9)); 240 kdb_printf(" IA32_MCG_R10: 0x%016qx\n", rdmsr64(IA32_MCG_R10)); 241 kdb_printf(" IA32_MCG_R11: 0x%016qx\n", rdmsr64(IA32_MCG_R11)); 242 kdb_printf(" IA32_MCG_R12: 0x%016qx\n", rdmsr64(IA32_MCG_R12)); 243 kdb_printf(" IA32_MCG_R13: 0x%016qx\n", rdmsr64(IA32_MCG_R13)); 244 kdb_printf(" IA32_MCG_R14: 0x%016qx\n", rdmsr64(IA32_MCG_R14)); 245 kdb_printf(" IA32_MCG_R15: 0x%016qx\n", rdmsr64(IA32_MCG_R15)); 246} 247 248static uint32_t rdmsr32(uint32_t msr) 249{ 250 return (uint32_t) rdmsr64(msr); 251} 252 253static void mca_dump_32bit_state(void) 254{ 255 kdb_printf("Extended Machine Check State:\n"); 256 kdb_printf(" IA32_MCG_EAX: 0x%08x\n", rdmsr32(IA32_MCG_EAX)); 257 kdb_printf(" IA32_MCG_EBX: 0x%08x\n", rdmsr32(IA32_MCG_EBX)); 258 kdb_printf(" IA32_MCG_ECX: 0x%08x\n", rdmsr32(IA32_MCG_ECX)); 259 kdb_printf(" IA32_MCG_EDX: 0x%08x\n", rdmsr32(IA32_MCG_EDX)); 260 kdb_printf(" IA32_MCG_ESI: 0x%08x\n", rdmsr32(IA32_MCG_ESI)); 261 kdb_printf(" IA32_MCG_EDI: 0x%08x\n", rdmsr32(IA32_MCG_EDI)); 262 kdb_printf(" IA32_MCG_EBP: 0x%08x\n", rdmsr32(IA32_MCG_EBP)); 263 kdb_printf(" IA32_MCG_ESP: 0x%08x\n", rdmsr32(IA32_MCG_ESP)); 264 kdb_printf(" IA32_MCG_EFLAGS: 0x%08x\n", rdmsr32(IA32_MCG_EFLAGS)); 265 kdb_printf(" IA32_MCG_EIP: 0x%08x\n", rdmsr32(IA32_MCG_EIP)); 266 kdb_printf(" IA32_MCG_MISC: 0x%08x\n", rdmsr32(IA32_MCG_MISC)); 267} 268 269static void 270mca_report_cpu_info(void) 271{ 272 uint64_t microcode; 273 i386_cpu_info_t *infop = cpuid_info(); 274 275 // microcode revision is top 32 bits of MSR_IA32_UCODE_REV 276 microcode = rdmsr64(MSR_IA32_UCODE_REV) >> 32; 277 kdb_printf(" family: %d model: %d stepping: %d microcode: %d\n", 278 infop->cpuid_family, 279 infop->cpuid_model, 280 infop->cpuid_stepping, 281 (uint32_t) microcode); 282 kdb_printf(" %s\n", infop->cpuid_brand_string); 283} 284 285static const char *mc8_memory_operation[] = { 286 [MC8_MMM_GENERIC] "generic", 287 [MC8_MMM_READ] "read", 288 [MC8_MMM_WRITE] "write", 289 [MC8_MMM_ADDRESS_COMMAND] "address/command", 290 [MC8_MMM_RESERVED] "reserved" 291}; 292 293static void 294mca_dump_bank_mc8(mca_state_t *state, int i) 295{ 296 mca_mci_bank_t *bank; 297 ia32_mci_status_t status; 298 struct ia32_mc8_specific mc8; 299 int mmm; 300 301 bank = &state->mca_error_bank[i]; 302 status = bank->mca_mci_status; 303 mc8 = status.bits_mc8; 304 mmm = MIN(mc8.memory_operation, MC8_MMM_RESERVED); 305 306 kdb_printf( 307 " IA32_MC%d_STATUS(0x%x): 0x%016qx %svalid\n", 308 i, IA32_MCi_STATUS(i), status.u64, IF(!status.bits.val, "in")); 309 if (!status.bits.val) 310 return; 311 312 kdb_printf( 313 " Channel number: %d%s\n" 314 " Memory Operation: %s\n" 315 " Machine-specific error: %s%s%s%s%s%s%s%s\n" 316 " COR_ERR_CNT: %d\n", 317 mc8.channel_number, 318 IF(mc8.channel_number == 15, " (unknown)"), 319 mc8_memory_operation[mmm], 320 IF(mc8.read_ecc, "Read ECC"), 321 IF(mc8.ecc_on_a_scrub, "ECC on scrub"), 322 IF(mc8.write_parity, "Write parity"), 323 IF(mc8.redundant_memory, "Redundant memory"), 324 IF(mc8.sparing, "Sparing/Resilvering"), 325 IF(mc8.access_out_of_range, "Access out of Range"), 326 IF(mc8.address_parity, "Address Parity"), 327 IF(mc8.byte_enable_parity, "Byte Enable Parity"), 328 mc8.cor_err_cnt); 329 kdb_printf( 330 " Status bits:\n%s%s%s%s%s%s", 331 IF(status.bits.pcc, " Processor context corrupt\n"), 332 IF(status.bits.addrv, " ADDR register valid\n"), 333 IF(status.bits.miscv, " MISC register valid\n"), 334 IF(status.bits.en, " Error enabled\n"), 335 IF(status.bits.uc, " Uncorrected error\n"), 336 IF(status.bits.over, " Error overflow\n")); 337 if (status.bits.addrv) 338 kdb_printf( 339 " IA32_MC%d_ADDR(0x%x): 0x%016qx\n", 340 i, IA32_MCi_ADDR(i), bank->mca_mci_addr); 341 if (status.bits.miscv) { 342 ia32_mc8_misc_t mc8_misc; 343 344 mc8_misc.u64 = bank->mca_mci_misc; 345 kdb_printf( 346 " IA32_MC%d_MISC(0x%x): 0x%016qx\n" 347 " DIMM: %d\n" 348 " Channel: %d\n" 349 " Syndrome: 0x%x\n", 350 i, IA32_MCi_MISC(i), mc8_misc.u64, 351 mc8_misc.bits.dimm, 352 mc8_misc.bits.channel, 353 (int) mc8_misc.bits.syndrome); 354 } 355} 356 357static const char *mca_threshold_status[] = { 358 [THRESHOLD_STATUS_NO_TRACKING] "No tracking", 359 [THRESHOLD_STATUS_GREEN] "Green", 360 [THRESHOLD_STATUS_YELLOW] "Yellow", 361 [THRESHOLD_STATUS_RESERVED] "Reserved" 362}; 363 364static void 365mca_dump_bank(mca_state_t *state, int i) 366{ 367 mca_mci_bank_t *bank; 368 ia32_mci_status_t status; 369 370 bank = &state->mca_error_bank[i]; 371 status = bank->mca_mci_status; 372 kdb_printf( 373 " IA32_MC%d_STATUS(0x%x): 0x%016qx %svalid\n", 374 i, IA32_MCi_STATUS(i), status.u64, IF(!status.bits.val, "in")); 375 if (!status.bits.val) 376 return; 377 378 kdb_printf( 379 " MCA error code: 0x%04x\n", 380 status.bits.mca_error); 381 kdb_printf( 382 " Model specific error code: 0x%04x\n", 383 status.bits.model_specific_error); 384 if (!mca_threshold_status_present) { 385 kdb_printf( 386 " Other information: 0x%08x\n", 387 status.bits.other_information); 388 } else { 389 int threshold = status.bits_tes_p.threshold; 390 kdb_printf( 391 " Other information: 0x%08x\n" 392 " Threshold-based status: %s\n", 393 status.bits_tes_p.other_information, 394 (status.bits_tes_p.uc == 0) ? 395 mca_threshold_status[threshold] : 396 "Undefined"); 397 } 398 kdb_printf( 399 " Status bits:\n%s%s%s%s%s%s", 400 IF(status.bits.pcc, " Processor context corrupt\n"), 401 IF(status.bits.addrv, " ADDR register valid\n"), 402 IF(status.bits.miscv, " MISC register valid\n"), 403 IF(status.bits.en, " Error enabled\n"), 404 IF(status.bits.uc, " Uncorrected error\n"), 405 IF(status.bits.over, " Error overflow\n")); 406 if (status.bits.addrv) 407 kdb_printf( 408 " IA32_MC%d_ADDR(0x%x): 0x%016qx\n", 409 i, IA32_MCi_ADDR(i), bank->mca_mci_addr); 410 if (status.bits.miscv) 411 kdb_printf( 412 " IA32_MC%d_MISC(0x%x): 0x%016qx\n", 413 i, IA32_MCi_MISC(i), bank->mca_mci_misc); 414} 415 416static void 417mca_dump_error_banks(mca_state_t *state) 418{ 419 unsigned int i; 420 421 kdb_printf("MCA error-reporting registers:\n"); 422 for (i = 0; i < mca_error_bank_count; i++ ) { 423 if (i == 8) { 424 /* 425 * Fatal Memory Error 426 */ 427 428 /* Dump MC8 for local package */ 429 kdb_printf(" Package %d logged:\n", 430 x86_package()->ppkg_num); 431 mca_dump_bank_mc8(state, 8); 432 433 /* If there's other packages, report their MC8s */ 434 x86_pkg_t *pkg; 435 uint64_t deadline; 436 for (pkg = x86_pkgs; pkg != NULL; pkg = pkg->next) { 437 if (pkg == x86_package()) 438 continue; 439 deadline = mach_absolute_time() + LockTimeOut; 440 while (pkg->mca_state == NULL && 441 mach_absolute_time() < deadline) 442 cpu_pause(); 443 if (pkg->mca_state) { 444 kdb_printf(" Package %d logged:\n", 445 pkg->ppkg_num); 446 mca_dump_bank_mc8(pkg->mca_state, 8); 447 } else { 448 kdb_printf(" Package %d timed out!\n", 449 pkg->ppkg_num); 450 } 451 } 452 continue; 453 } 454 mca_dump_bank(state, i); 455 } 456} 457 458void 459mca_dump(void) 460{ 461 ia32_mcg_status_t status; 462 mca_state_t *mca_state = current_cpu_datap()->cpu_mca_state; 463 464 /* 465 * Capture local MCA registers to per-cpu data. 466 */ 467 mca_save_state(mca_state); 468 469 /* 470 * Serialize in case of multiple simultaneous machine-checks. 471 * Only the first caller is allowed to dump MCA registers, 472 * other threads spin meantime. 473 */ 474 simple_lock(&mca_lock); 475 if (mca_dump_state > CLEAR) { 476 simple_unlock(&mca_lock); 477 while (mca_dump_state == DUMPING) 478 cpu_pause(); 479 return; 480 } 481 mca_dump_state = DUMPING; 482 simple_unlock(&mca_lock); 483 484 /* 485 * Report machine-check capabilities: 486 */ 487 kdb_printf( 488 "Machine-check capabilities (cpu %d) 0x%016qx:\n", 489 cpu_number(), ia32_mcg_cap.u64); 490 491 mca_report_cpu_info(); 492 493 kdb_printf( 494 " %d error-reporting banks\n%s%s%s", mca_error_bank_count, 495 IF(mca_control_MSR_present, 496 " control MSR present\n"), 497 IF(mca_threshold_status_present, 498 " threshold-based error status present\n"), 499 IF(mca_cmci_present, 500 " extended corrected memory error handling present\n")); 501 if (mca_extended_MSRs_present) 502 kdb_printf( 503 " %d extended MSRs present\n", mca_extended_MSRs_count); 504 505 /* 506 * Report machine-check status: 507 */ 508 status.u64 = rdmsr64(IA32_MCG_STATUS); 509 kdb_printf( 510 "Machine-check status 0x%016qx:\n%s%s%s", status.u64, 511 IF(status.bits.ripv, " restart IP valid\n"), 512 IF(status.bits.eipv, " error IP valid\n"), 513 IF(status.bits.mcip, " machine-check in progress\n")); 514 515 /* 516 * Dump error-reporting registers: 517 */ 518 mca_dump_error_banks(mca_state); 519 520 /* 521 * Dump any extended machine state: 522 */ 523 if (mca_extended_MSRs_present) { 524 if (cpu_mode_is64bit()) 525 mca_dump_64bit_state(); 526 else 527 mca_dump_32bit_state(); 528 } 529 530 /* Update state to release any other threads. */ 531 mca_dump_state = DUMPED; 532} 533