mca.c revision 205573
1/*- 2 * Copyright (c) 2009 Advanced Computing Technologies LLC 3 * Written by: John H. Baldwin <jhb@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28/* 29 * Support for x86 machine check architecture. 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: head/sys/i386/i386/mca.c 205573 2010-03-24 03:07:35Z alc $"); 34 35#include <sys/param.h> 36#include <sys/kernel.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/mutex.h> 40#include <sys/proc.h> 41#include <sys/sched.h> 42#include <sys/smp.h> 43#include <sys/sysctl.h> 44#include <sys/systm.h> 45#include <sys/taskqueue.h> 46#include <machine/cputypes.h> 47#include <machine/mca.h> 48#include <machine/md_var.h> 49#include <machine/specialreg.h> 50 51struct mca_internal { 52 struct mca_record rec; 53 int logged; 54 STAILQ_ENTRY(mca_internal) link; 55}; 56 57static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture"); 58 59static int mca_count; /* Number of records stored. */ 60 61SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture"); 62 63static int mca_enabled = 1; 64TUNABLE_INT("hw.mca.enabled", &mca_enabled); 65SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0, 66 "Administrative toggle for machine check support"); 67 68static int amd10h_L1TP = 1; 69TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP); 70SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0, 71 "Administrative toggle for logging of level one TLB parity (L1TP) errors"); 72 73int workaround_erratum383; 74SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0, 75 "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?"); 76 77static STAILQ_HEAD(, mca_internal) mca_records; 78static struct callout mca_timer; 79static int mca_ticks = 3600; /* Check hourly by default. */ 80static struct task mca_task; 81static struct mtx mca_lock; 82 83static int 84sysctl_mca_ticks(SYSCTL_HANDLER_ARGS) 85{ 86 int error, value; 87 88 value = mca_ticks; 89 error = sysctl_handle_int(oidp, &value, 0, req); 90 if (error || req->newptr == NULL) 91 return (error); 92 if (value <= 0) 93 return (EINVAL); 94 mca_ticks = value; 95 return (0); 96} 97 98static int 99sysctl_mca_records(SYSCTL_HANDLER_ARGS) 100{ 101 int *name = (int *)arg1; 102 u_int namelen = arg2; 103 struct mca_record record; 104 struct mca_internal *rec; 105 int i; 106 107 if (namelen != 1) 108 return (EINVAL); 109 110 if (name[0] < 0 || name[0] >= mca_count) 111 return (EINVAL); 112 113 mtx_lock_spin(&mca_lock); 114 if (name[0] >= mca_count) { 115 mtx_unlock_spin(&mca_lock); 116 return (EINVAL); 117 } 118 i = 0; 119 STAILQ_FOREACH(rec, &mca_records, link) { 120 if (i == name[0]) { 121 record = rec->rec; 122 break; 123 } 124 i++; 125 } 126 mtx_unlock_spin(&mca_lock); 127 return (SYSCTL_OUT(req, &record, sizeof(record))); 128} 129 130static const char * 131mca_error_ttype(uint16_t mca_error) 132{ 133 134 switch ((mca_error & 0x000c) >> 2) { 135 case 0: 136 return ("I"); 137 case 1: 138 return ("D"); 139 case 2: 140 return ("G"); 141 } 142 return ("?"); 143} 144 145static const char * 146mca_error_level(uint16_t mca_error) 147{ 148 149 switch (mca_error & 0x0003) { 150 case 0: 151 return ("L0"); 152 case 1: 153 return ("L1"); 154 case 2: 155 return ("L2"); 156 case 3: 157 return ("LG"); 158 } 159 return ("L?"); 160} 161 162static const char * 163mca_error_request(uint16_t mca_error) 164{ 165 166 switch ((mca_error & 0x00f0) >> 4) { 167 case 0x0: 168 return ("ERR"); 169 case 0x1: 170 return ("RD"); 171 case 0x2: 172 return ("WR"); 173 case 0x3: 174 return ("DRD"); 175 case 0x4: 176 return ("DWR"); 177 case 0x5: 178 return ("IRD"); 179 case 0x6: 180 return ("PREFETCH"); 181 case 0x7: 182 return ("EVICT"); 183 case 0x8: 184 return ("SNOOP"); 185 } 186 return ("???"); 187} 188 189static const char * 190mca_error_mmtype(uint16_t mca_error) 191{ 192 193 switch ((mca_error & 0x70) >> 4) { 194 case 0x0: 195 return ("GEN"); 196 case 0x1: 197 return ("RD"); 198 case 0x2: 199 return ("WR"); 200 case 0x3: 201 return ("AC"); 202 case 0x4: 203 return ("MS"); 204 } 205 return ("???"); 206} 207 208/* Dump details about a single machine check. */ 209static void __nonnull(1) 210mca_log(const struct mca_record *rec) 211{ 212 uint16_t mca_error; 213 214 printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank, 215 (long long)rec->mr_status); 216 printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n", 217 (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status); 218 printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor, 219 rec->mr_cpu_id, rec->mr_apic_id); 220 printf("MCA: CPU %d ", rec->mr_cpu); 221 if (rec->mr_status & MC_STATUS_UC) 222 printf("UNCOR "); 223 else { 224 printf("COR "); 225 if (rec->mr_mcg_cap & MCG_CAP_TES_P) 226 printf("(%lld) ", ((long long)rec->mr_status & 227 MC_STATUS_COR_COUNT) >> 38); 228 } 229 if (rec->mr_status & MC_STATUS_PCC) 230 printf("PCC "); 231 if (rec->mr_status & MC_STATUS_OVER) 232 printf("OVER "); 233 mca_error = rec->mr_status & MC_STATUS_MCA_ERROR; 234 switch (mca_error) { 235 /* Simple error codes. */ 236 case 0x0000: 237 printf("no error"); 238 break; 239 case 0x0001: 240 printf("unclassified error"); 241 break; 242 case 0x0002: 243 printf("ucode ROM parity error"); 244 break; 245 case 0x0003: 246 printf("external error"); 247 break; 248 case 0x0004: 249 printf("FRC error"); 250 break; 251 case 0x0005: 252 printf("internal parity error"); 253 break; 254 case 0x0400: 255 printf("internal timer error"); 256 break; 257 default: 258 if ((mca_error & 0xfc00) == 0x0400) { 259 printf("internal error %x", mca_error & 0x03ff); 260 break; 261 } 262 263 /* Compound error codes. */ 264 265 /* Memory hierarchy error. */ 266 if ((mca_error & 0xeffc) == 0x000c) { 267 printf("%s memory error", mca_error_level(mca_error)); 268 break; 269 } 270 271 /* TLB error. */ 272 if ((mca_error & 0xeff0) == 0x0010) { 273 printf("%sTLB %s error", mca_error_ttype(mca_error), 274 mca_error_level(mca_error)); 275 break; 276 } 277 278 /* Memory controller error. */ 279 if ((mca_error & 0xef80) == 0x0080) { 280 printf("%s channel ", mca_error_mmtype(mca_error)); 281 if ((mca_error & 0x000f) != 0x000f) 282 printf("%d", mca_error & 0x000f); 283 else 284 printf("??"); 285 printf(" memory error"); 286 break; 287 } 288 289 /* Cache error. */ 290 if ((mca_error & 0xef00) == 0x0100) { 291 printf("%sCACHE %s %s error", 292 mca_error_ttype(mca_error), 293 mca_error_level(mca_error), 294 mca_error_request(mca_error)); 295 break; 296 } 297 298 /* Bus and/or Interconnect error. */ 299 if ((mca_error & 0xe800) == 0x0800) { 300 printf("BUS%s ", mca_error_level(mca_error)); 301 switch ((mca_error & 0x0600) >> 9) { 302 case 0: 303 printf("Source"); 304 break; 305 case 1: 306 printf("Responder"); 307 break; 308 case 2: 309 printf("Observer"); 310 break; 311 default: 312 printf("???"); 313 break; 314 } 315 printf(" %s ", mca_error_request(mca_error)); 316 switch ((mca_error & 0x000c) >> 2) { 317 case 0: 318 printf("Memory"); 319 break; 320 case 2: 321 printf("I/O"); 322 break; 323 case 3: 324 printf("Other"); 325 break; 326 default: 327 printf("???"); 328 break; 329 } 330 if (mca_error & 0x0100) 331 printf(" timed out"); 332 break; 333 } 334 335 printf("unknown error %x", mca_error); 336 break; 337 } 338 printf("\n"); 339 if (rec->mr_status & MC_STATUS_ADDRV) 340 printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr); 341 if (rec->mr_status & MC_STATUS_MISCV) 342 printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc); 343} 344 345static int __nonnull(2) 346mca_check_status(int bank, struct mca_record *rec) 347{ 348 uint64_t status; 349 u_int p[4]; 350 351 status = rdmsr(MSR_MC_STATUS(bank)); 352 if (!(status & MC_STATUS_VAL)) 353 return (0); 354 355 /* Save exception information. */ 356 rec->mr_status = status; 357 rec->mr_bank = bank; 358 rec->mr_addr = 0; 359 if (status & MC_STATUS_ADDRV) 360 rec->mr_addr = rdmsr(MSR_MC_ADDR(bank)); 361 rec->mr_misc = 0; 362 if (status & MC_STATUS_MISCV) 363 rec->mr_misc = rdmsr(MSR_MC_MISC(bank)); 364 rec->mr_tsc = rdtsc(); 365 rec->mr_apic_id = PCPU_GET(apic_id); 366 rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP); 367 rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS); 368 rec->mr_cpu_id = cpu_id; 369 rec->mr_cpu_vendor_id = cpu_vendor_id; 370 rec->mr_cpu = PCPU_GET(cpuid); 371 372 /* 373 * Clear machine check. Don't do this for uncorrectable 374 * errors so that the BIOS can see them. 375 */ 376 if (!(rec->mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { 377 wrmsr(MSR_MC_STATUS(bank), 0); 378 do_cpuid(0, p); 379 } 380 return (1); 381} 382 383static void __nonnull(1) 384mca_record_entry(const struct mca_record *record) 385{ 386 struct mca_internal *rec; 387 388 rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT); 389 if (rec == NULL) { 390 printf("MCA: Unable to allocate space for an event.\n"); 391 mca_log(record); 392 return; 393 } 394 395 rec->rec = *record; 396 rec->logged = 0; 397 mtx_lock_spin(&mca_lock); 398 STAILQ_INSERT_TAIL(&mca_records, rec, link); 399 mca_count++; 400 mtx_unlock_spin(&mca_lock); 401} 402 403/* 404 * This scans all the machine check banks of the current CPU to see if 405 * there are any machine checks. Any non-recoverable errors are 406 * reported immediately via mca_log(). The current thread must be 407 * pinned when this is called. The 'mcip' parameter indicates if we 408 * are being called from the MC exception handler. In that case this 409 * function returns true if the system is restartable. Otherwise, it 410 * returns a count of the number of valid MC records found. 411 */ 412static int 413mca_scan(int mcip) 414{ 415 struct mca_record rec; 416 uint64_t mcg_cap, ucmask; 417 int count, i, recoverable; 418 419 count = 0; 420 recoverable = 1; 421 ucmask = MC_STATUS_UC | MC_STATUS_PCC; 422 423 /* When handling a MCE#, treat the OVER flag as non-restartable. */ 424 if (mcip) 425 ucmask |= MC_STATUS_OVER; 426 mcg_cap = rdmsr(MSR_MCG_CAP); 427 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { 428 if (mca_check_status(i, &rec)) { 429 count++; 430 if (rec.mr_status & ucmask) { 431 recoverable = 0; 432 mca_log(&rec); 433 } 434 mca_record_entry(&rec); 435 } 436 } 437 return (mcip ? recoverable : count); 438} 439 440/* 441 * Scan the machine check banks on all CPUs by binding to each CPU in 442 * turn. If any of the CPUs contained new machine check records, log 443 * them to the console. 444 */ 445static void 446mca_scan_cpus(void *context, int pending) 447{ 448 struct mca_internal *mca; 449 struct thread *td; 450 int count, cpu; 451 452 td = curthread; 453 count = 0; 454 thread_lock(td); 455 for (cpu = 0; cpu <= mp_maxid; cpu++) { 456 if (CPU_ABSENT(cpu)) 457 continue; 458 sched_bind(td, cpu); 459 thread_unlock(td); 460 count += mca_scan(0); 461 thread_lock(td); 462 sched_unbind(td); 463 } 464 thread_unlock(td); 465 if (count != 0) { 466 mtx_lock_spin(&mca_lock); 467 STAILQ_FOREACH(mca, &mca_records, link) { 468 if (!mca->logged) { 469 mca->logged = 1; 470 mtx_unlock_spin(&mca_lock); 471 mca_log(&mca->rec); 472 mtx_lock_spin(&mca_lock); 473 } 474 } 475 mtx_unlock_spin(&mca_lock); 476 } 477} 478 479static void 480mca_periodic_scan(void *arg) 481{ 482 483 taskqueue_enqueue(taskqueue_thread, &mca_task); 484 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL); 485} 486 487static int 488sysctl_mca_scan(SYSCTL_HANDLER_ARGS) 489{ 490 int error, i; 491 492 i = 0; 493 error = sysctl_handle_int(oidp, &i, 0, req); 494 if (error) 495 return (error); 496 if (i) 497 taskqueue_enqueue(taskqueue_thread, &mca_task); 498 return (0); 499} 500 501static void 502mca_startup(void *dummy) 503{ 504 505 if (!mca_enabled || !(cpu_feature & CPUID_MCA)) 506 return; 507 508 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, 509 NULL); 510} 511SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL); 512 513static void 514mca_setup(void) 515{ 516 517 mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); 518 STAILQ_INIT(&mca_records); 519 TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL); 520 callout_init(&mca_timer, CALLOUT_MPSAFE); 521 SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 522 "count", CTLFLAG_RD, &mca_count, 0, "Record count"); 523 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 524 "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks, 525 0, sysctl_mca_ticks, "I", 526 "Periodic interval in seconds to scan for machine checks"); 527 SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 528 "records", CTLFLAG_RD, sysctl_mca_records, "Machine check records"); 529 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 530 "force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, 531 sysctl_mca_scan, "I", "Force an immediate scan for machine checks"); 532} 533 534/* Must be executed on each CPU. */ 535void 536mca_init(void) 537{ 538 uint64_t mcg_cap; 539 uint64_t ctl, mask; 540 int skip; 541 int i; 542 543 /* MCE is required. */ 544 if (!mca_enabled || !(cpu_feature & CPUID_MCE)) 545 return; 546 547 /* 548 * On AMD Family 10h processors, unless logging of level one TLB 549 * parity (L1TP) errors is disabled, enable the recommended workaround 550 * for Erratum 383. 551 */ 552 if (cpu_vendor_id == CPU_VENDOR_AMD && 553 CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP) 554 workaround_erratum383 = 1; 555 556 if (cpu_feature & CPUID_MCA) { 557 if (PCPU_GET(cpuid) == 0) 558 mca_setup(); 559 560 sched_pin(); 561 mcg_cap = rdmsr(MSR_MCG_CAP); 562 if (mcg_cap & MCG_CAP_CTL_P) 563 /* Enable MCA features. */ 564 wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); 565 566 /* 567 * Disable logging of level one TLB parity (L1TP) errors by 568 * the data cache as an alternative workaround for AMD Family 569 * 10h Erratum 383. Unlike the recommended workaround, there 570 * is no performance penalty to this workaround. However, 571 * L1TP errors will go unreported. 572 */ 573 if (cpu_vendor_id == CPU_VENDOR_AMD && 574 CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) { 575 mask = rdmsr(MSR_MC0_CTL_MASK); 576 if ((mask & (1UL << 5)) == 0) 577 wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5)); 578 } 579 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { 580 /* By default enable logging of all errors. */ 581 ctl = 0xffffffffffffffffUL; 582 skip = 0; 583 584 if (cpu_vendor_id == CPU_VENDOR_INTEL) { 585 /* 586 * For P6 models before Nehalem MC0_CTL is 587 * always enabled and reserved. 588 */ 589 if (i == 0 && CPUID_TO_FAMILY(cpu_id) == 0x6 590 && CPUID_TO_MODEL(cpu_id) < 0x1a) 591 skip = 1; 592 } else if (cpu_vendor_id == CPU_VENDOR_AMD) { 593 /* BKDG for Family 10h: unset GartTblWkEn. */ 594 if (i == 4 && CPUID_TO_FAMILY(cpu_id) >= 0xf) 595 ctl &= ~(1UL << 10); 596 } 597 598 if (!skip) 599 wrmsr(MSR_MC_CTL(i), ctl); 600 /* Clear all errors. */ 601 wrmsr(MSR_MC_STATUS(i), 0); 602 } 603 sched_unpin(); 604 } 605 606 load_cr4(rcr4() | CR4_MCE); 607} 608 609/* Called when a machine check exception fires. */ 610int 611mca_intr(void) 612{ 613 uint64_t mcg_status; 614 int recoverable; 615 616 if (!(cpu_feature & CPUID_MCA)) { 617 /* 618 * Just print the values of the old Pentium registers 619 * and panic. 620 */ 621 printf("MC Type: 0x%llx Address: 0x%llx\n", 622 rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR)); 623 return (0); 624 } 625 626 /* Scan the banks and check for any non-recoverable errors. */ 627 recoverable = mca_scan(1); 628 mcg_status = rdmsr(MSR_MCG_STATUS); 629 if (!(mcg_status & MCG_STATUS_RIPV)) 630 recoverable = 0; 631 632 /* Clear MCIP. */ 633 wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP); 634 return (recoverable); 635} 636