mca.c revision 205214
1/*- 2 * Copyright (c) 2009 Advanced Computing Technologies LLC 3 * Written by: John H. Baldwin <jhb@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28/* 29 * Support for x86 machine check architecture. 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: head/sys/i386/i386/mca.c 205214 2010-03-16 16:01:19Z jhb $"); 34 35#include <sys/param.h> 36#include <sys/kernel.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/mutex.h> 40#include <sys/proc.h> 41#include <sys/sched.h> 42#include <sys/smp.h> 43#include <sys/sysctl.h> 44#include <sys/systm.h> 45#include <sys/taskqueue.h> 46#include <machine/cputypes.h> 47#include <machine/mca.h> 48#include <machine/md_var.h> 49#include <machine/specialreg.h> 50 51struct mca_internal { 52 struct mca_record rec; 53 int logged; 54 STAILQ_ENTRY(mca_internal) link; 55}; 56 57static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture"); 58 59static int mca_count; /* Number of records stored. */ 60 61SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture"); 62 63static int mca_enabled = 0; 64TUNABLE_INT("hw.mca.enabled", &mca_enabled); 65SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0, 66 "Administrative toggle for machine check support"); 67 68static STAILQ_HEAD(, mca_internal) mca_records; 69static struct callout mca_timer; 70static int mca_ticks = 3600; /* Check hourly by default. */ 71static struct task mca_task; 72static struct mtx mca_lock; 73 74static int 75sysctl_mca_ticks(SYSCTL_HANDLER_ARGS) 76{ 77 int error, value; 78 79 value = mca_ticks; 80 error = sysctl_handle_int(oidp, &value, 0, req); 81 if (error || req->newptr == NULL) 82 return (error); 83 if (value <= 0) 84 return (EINVAL); 85 mca_ticks = value; 86 return (0); 87} 88 89static int 90sysctl_mca_records(SYSCTL_HANDLER_ARGS) 91{ 92 int *name = (int *)arg1; 93 u_int namelen = arg2; 94 struct mca_record record; 95 struct mca_internal *rec; 96 int i; 97 98 if (namelen != 1) 99 return (EINVAL); 100 101 if (name[0] < 0 || name[0] >= mca_count) 102 return (EINVAL); 103 104 mtx_lock_spin(&mca_lock); 105 if (name[0] >= mca_count) { 106 mtx_unlock_spin(&mca_lock); 107 return (EINVAL); 108 } 109 i = 0; 110 STAILQ_FOREACH(rec, &mca_records, link) { 111 if (i == name[0]) { 112 record = rec->rec; 113 break; 114 } 115 i++; 116 } 117 mtx_unlock_spin(&mca_lock); 118 return (SYSCTL_OUT(req, &record, sizeof(record))); 119} 120 121static const char * 122mca_error_ttype(uint16_t mca_error) 123{ 124 125 switch ((mca_error & 0x000c) >> 2) { 126 case 0: 127 return ("I"); 128 case 1: 129 return ("D"); 130 case 2: 131 return ("G"); 132 } 133 return ("?"); 134} 135 136static const char * 137mca_error_level(uint16_t mca_error) 138{ 139 140 switch (mca_error & 0x0003) { 141 case 0: 142 return ("L0"); 143 case 1: 144 return ("L1"); 145 case 2: 146 return ("L2"); 147 case 3: 148 return ("LG"); 149 } 150 return ("L?"); 151} 152 153static const char * 154mca_error_request(uint16_t mca_error) 155{ 156 157 switch ((mca_error & 0x00f0) >> 4) { 158 case 0x0: 159 return ("ERR"); 160 case 0x1: 161 return ("RD"); 162 case 0x2: 163 return ("WR"); 164 case 0x3: 165 return ("DRD"); 166 case 0x4: 167 return ("DWR"); 168 case 0x5: 169 return ("IRD"); 170 case 0x6: 171 return ("PREFETCH"); 172 case 0x7: 173 return ("EVICT"); 174 case 0x8: 175 return ("SNOOP"); 176 } 177 return ("???"); 178} 179 180static const char * 181mca_error_mmtype(uint16_t mca_error) 182{ 183 184 switch ((mca_error & 0x70) >> 4) { 185 case 0x0: 186 return ("GEN"); 187 case 0x1: 188 return ("RD"); 189 case 0x2: 190 return ("WR"); 191 case 0x3: 192 return ("AC"); 193 case 0x4: 194 return ("MS"); 195 } 196 return ("???"); 197} 198 199/* Dump details about a single machine check. */ 200static void __nonnull(1) 201mca_log(const struct mca_record *rec) 202{ 203 uint16_t mca_error; 204 205 printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank, 206 (long long)rec->mr_status); 207 printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n", 208 (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status); 209 printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor, 210 rec->mr_cpu_id, rec->mr_apic_id); 211 printf("MCA: CPU %d ", rec->mr_cpu); 212 if (rec->mr_status & MC_STATUS_UC) 213 printf("UNCOR "); 214 else { 215 printf("COR "); 216 if (rec->mr_mcg_cap & MCG_CAP_TES_P) 217 printf("(%lld) ", ((long long)rec->mr_status & 218 MC_STATUS_COR_COUNT) >> 38); 219 } 220 if (rec->mr_status & MC_STATUS_PCC) 221 printf("PCC "); 222 if (rec->mr_status & MC_STATUS_OVER) 223 printf("OVER "); 224 mca_error = rec->mr_status & MC_STATUS_MCA_ERROR; 225 switch (mca_error) { 226 /* Simple error codes. */ 227 case 0x0000: 228 printf("no error"); 229 break; 230 case 0x0001: 231 printf("unclassified error"); 232 break; 233 case 0x0002: 234 printf("ucode ROM parity error"); 235 break; 236 case 0x0003: 237 printf("external error"); 238 break; 239 case 0x0004: 240 printf("FRC error"); 241 break; 242 case 0x0005: 243 printf("internal parity error"); 244 break; 245 case 0x0400: 246 printf("internal timer error"); 247 break; 248 default: 249 if ((mca_error & 0xfc00) == 0x0400) { 250 printf("internal error %x", mca_error & 0x03ff); 251 break; 252 } 253 254 /* Compound error codes. */ 255 256 /* Memory hierarchy error. */ 257 if ((mca_error & 0xeffc) == 0x000c) { 258 printf("%s memory error", mca_error_level(mca_error)); 259 break; 260 } 261 262 /* TLB error. */ 263 if ((mca_error & 0xeff0) == 0x0010) { 264 printf("%sTLB %s error", mca_error_ttype(mca_error), 265 mca_error_level(mca_error)); 266 break; 267 } 268 269 /* Memory controller error. */ 270 if ((mca_error & 0xef80) == 0x0080) { 271 printf("%s channel ", mca_error_mmtype(mca_error)); 272 if ((mca_error & 0x000f) != 0x000f) 273 printf("%d", mca_error & 0x000f); 274 else 275 printf("??"); 276 printf(" memory error"); 277 break; 278 } 279 280 /* Cache error. */ 281 if ((mca_error & 0xef00) == 0x0100) { 282 printf("%sCACHE %s %s error", 283 mca_error_ttype(mca_error), 284 mca_error_level(mca_error), 285 mca_error_request(mca_error)); 286 break; 287 } 288 289 /* Bus and/or Interconnect error. */ 290 if ((mca_error & 0xe800) == 0x0800) { 291 printf("BUS%s ", mca_error_level(mca_error)); 292 switch ((mca_error & 0x0600) >> 9) { 293 case 0: 294 printf("Source"); 295 break; 296 case 1: 297 printf("Responder"); 298 break; 299 case 2: 300 printf("Observer"); 301 break; 302 default: 303 printf("???"); 304 break; 305 } 306 printf(" %s ", mca_error_request(mca_error)); 307 switch ((mca_error & 0x000c) >> 2) { 308 case 0: 309 printf("Memory"); 310 break; 311 case 2: 312 printf("I/O"); 313 break; 314 case 3: 315 printf("Other"); 316 break; 317 default: 318 printf("???"); 319 break; 320 } 321 if (mca_error & 0x0100) 322 printf(" timed out"); 323 break; 324 } 325 326 printf("unknown error %x", mca_error); 327 break; 328 } 329 printf("\n"); 330 if (rec->mr_status & MC_STATUS_ADDRV) 331 printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr); 332 if (rec->mr_status & MC_STATUS_MISCV) 333 printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc); 334} 335 336static int __nonnull(2) 337mca_check_status(int bank, struct mca_record *rec) 338{ 339 uint64_t status; 340 u_int p[4]; 341 342 status = rdmsr(MSR_MC_STATUS(bank)); 343 if (!(status & MC_STATUS_VAL)) 344 return (0); 345 346 /* Save exception information. */ 347 rec->mr_status = status; 348 rec->mr_bank = bank; 349 rec->mr_addr = 0; 350 if (status & MC_STATUS_ADDRV) 351 rec->mr_addr = rdmsr(MSR_MC_ADDR(bank)); 352 rec->mr_misc = 0; 353 if (status & MC_STATUS_MISCV) 354 rec->mr_misc = rdmsr(MSR_MC_MISC(bank)); 355 rec->mr_tsc = rdtsc(); 356 rec->mr_apic_id = PCPU_GET(apic_id); 357 rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP); 358 rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS); 359 rec->mr_cpu_id = cpu_id; 360 rec->mr_cpu_vendor_id = cpu_vendor_id; 361 rec->mr_cpu = PCPU_GET(cpuid); 362 363 /* 364 * Clear machine check. Don't do this for uncorrectable 365 * errors so that the BIOS can see them. 366 */ 367 if (!(rec->mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { 368 wrmsr(MSR_MC_STATUS(bank), 0); 369 do_cpuid(0, p); 370 } 371 return (1); 372} 373 374static void __nonnull(1) 375mca_record_entry(const struct mca_record *record) 376{ 377 struct mca_internal *rec; 378 379 rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT); 380 if (rec == NULL) { 381 printf("MCA: Unable to allocate space for an event.\n"); 382 mca_log(record); 383 return; 384 } 385 386 rec->rec = *record; 387 rec->logged = 0; 388 mtx_lock_spin(&mca_lock); 389 STAILQ_INSERT_TAIL(&mca_records, rec, link); 390 mca_count++; 391 mtx_unlock_spin(&mca_lock); 392} 393 394/* 395 * This scans all the machine check banks of the current CPU to see if 396 * there are any machine checks. Any non-recoverable errors are 397 * reported immediately via mca_log(). The current thread must be 398 * pinned when this is called. The 'mcip' parameter indicates if we 399 * are being called from the MC exception handler. In that case this 400 * function returns true if the system is restartable. Otherwise, it 401 * returns a count of the number of valid MC records found. 402 */ 403static int 404mca_scan(int mcip) 405{ 406 struct mca_record rec; 407 uint64_t mcg_cap, ucmask; 408 int count, i, recoverable; 409 410 count = 0; 411 recoverable = 1; 412 ucmask = MC_STATUS_UC | MC_STATUS_PCC; 413 414 /* When handling a MCE#, treat the OVER flag as non-restartable. */ 415 if (mcip) 416 ucmask |= MC_STATUS_OVER; 417 mcg_cap = rdmsr(MSR_MCG_CAP); 418 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { 419 if (mca_check_status(i, &rec)) { 420 count++; 421 if (rec.mr_status & ucmask) { 422 recoverable = 0; 423 mca_log(&rec); 424 } 425 mca_record_entry(&rec); 426 } 427 } 428 return (mcip ? recoverable : count); 429} 430 431/* 432 * Scan the machine check banks on all CPUs by binding to each CPU in 433 * turn. If any of the CPUs contained new machine check records, log 434 * them to the console. 435 */ 436static void 437mca_scan_cpus(void *context, int pending) 438{ 439 struct mca_internal *mca; 440 struct thread *td; 441 int count, cpu; 442 443 td = curthread; 444 count = 0; 445 thread_lock(td); 446 for (cpu = 0; cpu <= mp_maxid; cpu++) { 447 if (CPU_ABSENT(cpu)) 448 continue; 449 sched_bind(td, cpu); 450 thread_unlock(td); 451 count += mca_scan(0); 452 thread_lock(td); 453 sched_unbind(td); 454 } 455 thread_unlock(td); 456 if (count != 0) { 457 mtx_lock_spin(&mca_lock); 458 STAILQ_FOREACH(mca, &mca_records, link) { 459 if (!mca->logged) { 460 mca->logged = 1; 461 mtx_unlock_spin(&mca_lock); 462 mca_log(&mca->rec); 463 mtx_lock_spin(&mca_lock); 464 } 465 } 466 mtx_unlock_spin(&mca_lock); 467 } 468} 469 470static void 471mca_periodic_scan(void *arg) 472{ 473 474 taskqueue_enqueue(taskqueue_thread, &mca_task); 475 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL); 476} 477 478static int 479sysctl_mca_scan(SYSCTL_HANDLER_ARGS) 480{ 481 int error, i; 482 483 i = 0; 484 error = sysctl_handle_int(oidp, &i, 0, req); 485 if (error) 486 return (error); 487 if (i) 488 taskqueue_enqueue(taskqueue_thread, &mca_task); 489 return (0); 490} 491 492static void 493mca_startup(void *dummy) 494{ 495 496 if (!mca_enabled || !(cpu_feature & CPUID_MCA)) 497 return; 498 499 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, 500 NULL); 501} 502SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL); 503 504static void 505mca_setup(void) 506{ 507 508 mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); 509 STAILQ_INIT(&mca_records); 510 TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL); 511 callout_init(&mca_timer, CALLOUT_MPSAFE); 512 SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 513 "count", CTLFLAG_RD, &mca_count, 0, "Record count"); 514 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 515 "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks, 516 0, sysctl_mca_ticks, "I", 517 "Periodic interval in seconds to scan for machine checks"); 518 SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 519 "records", CTLFLAG_RD, sysctl_mca_records, "Machine check records"); 520 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 521 "force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, 522 sysctl_mca_scan, "I", "Force an immediate scan for machine checks"); 523} 524 525/* Must be executed on each CPU. */ 526void 527mca_init(void) 528{ 529 uint64_t mcg_cap; 530 uint64_t ctl; 531 int skip; 532 int i; 533 534 /* MCE is required. */ 535 if (!mca_enabled || !(cpu_feature & CPUID_MCE)) 536 return; 537 538 if (cpu_feature & CPUID_MCA) { 539 if (PCPU_GET(cpuid) == 0) 540 mca_setup(); 541 542 sched_pin(); 543 mcg_cap = rdmsr(MSR_MCG_CAP); 544 if (mcg_cap & MCG_CAP_CTL_P) 545 /* Enable MCA features. */ 546 wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); 547 548 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { 549 /* By default enable logging of all errors. */ 550 ctl = 0xffffffffffffffffUL; 551 skip = 0; 552 553 if (cpu_vendor_id == CPU_VENDOR_INTEL) { 554 /* 555 * For P6 models before Nehalem MC0_CTL is 556 * always enabled and reserved. 557 */ 558 if (i == 0 && CPUID_TO_FAMILY(cpu_id) == 0x6 559 && CPUID_TO_MODEL(cpu_id) < 0x1a) 560 skip = 1; 561 } else if (cpu_vendor_id == CPU_VENDOR_AMD) { 562 /* BKDG for Family 10h: unset GartTblWkEn. */ 563 if (i == 4 && CPUID_TO_FAMILY(cpu_id) >= 0xf) 564 ctl &= ~(1UL << 10); 565 } 566 567 if (!skip) 568 wrmsr(MSR_MC_CTL(i), ctl); 569 /* Clear all errors. */ 570 wrmsr(MSR_MC_STATUS(i), 0); 571 } 572 sched_unpin(); 573 } 574 575 load_cr4(rcr4() | CR4_MCE); 576} 577 578/* Called when a machine check exception fires. */ 579int 580mca_intr(void) 581{ 582 uint64_t mcg_status; 583 int recoverable; 584 585 if (!(cpu_feature & CPUID_MCA)) { 586 /* 587 * Just print the values of the old Pentium registers 588 * and panic. 589 */ 590 printf("MC Type: 0x%llx Address: 0x%llx\n", 591 rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR)); 592 return (0); 593 } 594 595 /* Scan the banks and check for any non-recoverable errors. */ 596 recoverable = mca_scan(1); 597 mcg_status = rdmsr(MSR_MCG_STATUS); 598 if (!(mcg_status & MCG_STATUS_RIPV)) 599 recoverable = 0; 600 601 /* Clear MCIP. */ 602 wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP); 603 return (recoverable); 604} 605