mca.c revision 200064
1/*- 2 * Copyright (c) 2009 Advanced Computing Technologies LLC 3 * Written by: John H. Baldwin <jhb@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28/* 29 * Support for x86 machine check architecture. 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: head/sys/i386/i386/mca.c 200064 2009-12-03 16:10:21Z avg $"); 34 35#include <sys/param.h> 36#include <sys/kernel.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/mutex.h> 40#include <sys/proc.h> 41#include <sys/sched.h> 42#include <sys/smp.h> 43#include <sys/sysctl.h> 44#include <sys/systm.h> 45#include <sys/taskqueue.h> 46#include <machine/cputypes.h> 47#include <machine/mca.h> 48#include <machine/md_var.h> 49#include <machine/specialreg.h> 50 51struct mca_internal { 52 struct mca_record rec; 53 int logged; 54 STAILQ_ENTRY(mca_internal) link; 55}; 56 57static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture"); 58 59static int mca_count; /* Number of records stored. */ 60 61SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture"); 62 63static int mca_enabled = 0; 64TUNABLE_INT("hw.mca.enabled", &mca_enabled); 65SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0, 66 "Administrative toggle for machine check support"); 67 68static STAILQ_HEAD(, mca_internal) mca_records; 69static struct callout mca_timer; 70static int mca_ticks = 3600; /* Check hourly by default. */ 71static struct task mca_task; 72static struct mtx mca_lock; 73 74static int 75sysctl_mca_ticks(SYSCTL_HANDLER_ARGS) 76{ 77 int error, value; 78 79 value = mca_ticks; 80 error = sysctl_handle_int(oidp, &value, 0, req); 81 if (error || req->newptr == NULL) 82 return (error); 83 if (value <= 0) 84 return (EINVAL); 85 mca_ticks = value; 86 return (0); 87} 88 89static int 90sysctl_mca_records(SYSCTL_HANDLER_ARGS) 91{ 92 int *name = (int *)arg1; 93 u_int namelen = arg2; 94 struct mca_record record; 95 struct mca_internal *rec; 96 int i; 97 98 if (namelen != 1) 99 return (EINVAL); 100 101 if (name[0] < 0 || name[0] >= mca_count) 102 return (EINVAL); 103 104 mtx_lock_spin(&mca_lock); 105 if (name[0] >= mca_count) { 106 mtx_unlock_spin(&mca_lock); 107 return (EINVAL); 108 } 109 i = 0; 110 STAILQ_FOREACH(rec, &mca_records, link) { 111 if (i == name[0]) { 112 record = rec->rec; 113 break; 114 } 115 i++; 116 } 117 mtx_unlock_spin(&mca_lock); 118 return (SYSCTL_OUT(req, &record, sizeof(record))); 119} 120 121static const char * 122mca_error_ttype(uint16_t mca_error) 123{ 124 125 switch ((mca_error & 0x000c) >> 2) { 126 case 0: 127 return ("I"); 128 case 1: 129 return ("D"); 130 case 2: 131 return ("G"); 132 } 133 return ("?"); 134} 135 136static const char * 137mca_error_level(uint16_t mca_error) 138{ 139 140 switch (mca_error & 0x0003) { 141 case 0: 142 return ("L0"); 143 case 1: 144 return ("L1"); 145 case 2: 146 return ("L2"); 147 case 3: 148 return ("LG"); 149 } 150 return ("L?"); 151} 152 153static const char * 154mca_error_request(uint16_t mca_error) 155{ 156 157 switch ((mca_error & 0x00f0) >> 4) { 158 case 0x0: 159 return ("ERR"); 160 case 0x1: 161 return ("RD"); 162 case 0x2: 163 return ("WR"); 164 case 0x3: 165 return ("DRD"); 166 case 0x4: 167 return ("DWR"); 168 case 0x5: 169 return ("IRD"); 170 case 0x6: 171 return ("PREFETCH"); 172 case 0x7: 173 return ("EVICT"); 174 case 0x8: 175 return ("SNOOP"); 176 } 177 return ("???"); 178} 179 180/* Dump details about a single machine check. */ 181static void __nonnull(1) 182mca_log(const struct mca_record *rec) 183{ 184 uint16_t mca_error; 185 186 printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank, 187 (long long)rec->mr_status); 188 printf("MCA: CPU %d ", rec->mr_apic_id); 189 if (rec->mr_status & MC_STATUS_UC) 190 printf("UNCOR "); 191 else 192 printf("COR "); 193 if (rec->mr_status & MC_STATUS_PCC) 194 printf("PCC "); 195 if (rec->mr_status & MC_STATUS_OVER) 196 printf("OVER "); 197 mca_error = rec->mr_status & MC_STATUS_MCA_ERROR; 198 switch (mca_error) { 199 /* Simple error codes. */ 200 case 0x0000: 201 printf("no error"); 202 break; 203 case 0x0001: 204 printf("unclassified error"); 205 break; 206 case 0x0002: 207 printf("ucode ROM parity error"); 208 break; 209 case 0x0003: 210 printf("external error"); 211 break; 212 case 0x0004: 213 printf("FRC error"); 214 break; 215 case 0x0400: 216 printf("internal timer error"); 217 break; 218 default: 219 if ((mca_error & 0xfc00) == 0x0400) { 220 printf("internal error %x", mca_error & 0x03ff); 221 break; 222 } 223 224 /* Compound error codes. */ 225 226 /* Memory hierarchy error. */ 227 if ((mca_error & 0xeffc) == 0x000c) { 228 printf("%s memory error", mca_error_level(mca_error)); 229 break; 230 } 231 232 /* TLB error. */ 233 if ((mca_error & 0xeff0) == 0x0010) { 234 printf("%sTLB %s error", mca_error_ttype(mca_error), 235 mca_error_level(mca_error)); 236 break; 237 } 238 239 /* Cache error. */ 240 if ((mca_error & 0xef00) == 0x0100) { 241 printf("%sCACHE %s %s error", 242 mca_error_ttype(mca_error), 243 mca_error_level(mca_error), 244 mca_error_request(mca_error)); 245 break; 246 } 247 248 /* Bus and/or Interconnect error. */ 249 if ((mca_error & 0xe800) == 0x0800) { 250 printf("BUS%s ", mca_error_level(mca_error)); 251 switch ((mca_error & 0x0600) >> 9) { 252 case 0: 253 printf("Source"); 254 break; 255 case 1: 256 printf("Responder"); 257 break; 258 case 2: 259 printf("Observer"); 260 break; 261 default: 262 printf("???"); 263 break; 264 } 265 printf(" %s ", mca_error_request(mca_error)); 266 switch ((mca_error & 0x000c) >> 2) { 267 case 0: 268 printf("Memory"); 269 break; 270 case 2: 271 printf("I/O"); 272 break; 273 case 3: 274 printf("Other"); 275 break; 276 default: 277 printf("???"); 278 break; 279 } 280 if (mca_error & 0x0100) 281 printf(" timed out"); 282 break; 283 } 284 285 printf("unknown error %x", mca_error); 286 break; 287 } 288 printf("\n"); 289 if (rec->mr_status & MC_STATUS_ADDRV) 290 printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr); 291} 292 293static int __nonnull(2) 294mca_check_status(int bank, struct mca_record *rec) 295{ 296 uint64_t status; 297 u_int p[4]; 298 299 status = rdmsr(MSR_MC_STATUS(bank)); 300 if (!(status & MC_STATUS_VAL)) 301 return (0); 302 303 /* Save exception information. */ 304 rec->mr_status = status; 305 rec->mr_bank = bank; 306 rec->mr_addr = 0; 307 if (status & MC_STATUS_ADDRV) 308 rec->mr_addr = rdmsr(MSR_MC_ADDR(bank)); 309 rec->mr_misc = 0; 310 if (status & MC_STATUS_MISCV) 311 rec->mr_misc = rdmsr(MSR_MC_MISC(bank)); 312 rec->mr_tsc = rdtsc(); 313 rec->mr_apic_id = PCPU_GET(apic_id); 314 315 /* 316 * Clear machine check. Don't do this for uncorrectable 317 * errors so that the BIOS can see them. 318 */ 319 if (!(rec->mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { 320 wrmsr(MSR_MC_STATUS(bank), 0); 321 do_cpuid(0, p); 322 } 323 return (1); 324} 325 326static void __nonnull(1) 327mca_record_entry(const struct mca_record *record) 328{ 329 struct mca_internal *rec; 330 331 rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT); 332 if (rec == NULL) { 333 printf("MCA: Unable to allocate space for an event.\n"); 334 mca_log(record); 335 return; 336 } 337 338 rec->rec = *record; 339 rec->logged = 0; 340 mtx_lock_spin(&mca_lock); 341 STAILQ_INSERT_TAIL(&mca_records, rec, link); 342 mca_count++; 343 mtx_unlock_spin(&mca_lock); 344} 345 346/* 347 * This scans all the machine check banks of the current CPU to see if 348 * there are any machine checks. Any non-recoverable errors are 349 * reported immediately via mca_log(). The current thread must be 350 * pinned when this is called. The 'mcip' parameter indicates if we 351 * are being called from the MC exception handler. In that case this 352 * function returns true if the system is restartable. Otherwise, it 353 * returns a count of the number of valid MC records found. 354 */ 355static int 356mca_scan(int mcip) 357{ 358 struct mca_record rec; 359 uint64_t mcg_cap, ucmask; 360 int count, i, recoverable; 361 362 count = 0; 363 recoverable = 1; 364 ucmask = MC_STATUS_UC | MC_STATUS_PCC; 365 366 /* When handling a MCE#, treat the OVER flag as non-restartable. */ 367 if (mcip) 368 ucmask |= MC_STATUS_OVER; 369 mcg_cap = rdmsr(MSR_MCG_CAP); 370 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { 371 if (mca_check_status(i, &rec)) { 372 count++; 373 if (rec.mr_status & ucmask) { 374 recoverable = 0; 375 mca_log(&rec); 376 } 377 mca_record_entry(&rec); 378 } 379 } 380 return (mcip ? recoverable : count); 381} 382 383/* 384 * Scan the machine check banks on all CPUs by binding to each CPU in 385 * turn. If any of the CPUs contained new machine check records, log 386 * them to the console. 387 */ 388static void 389mca_scan_cpus(void *context, int pending) 390{ 391 struct mca_internal *mca; 392 struct thread *td; 393 int count, cpu; 394 395 td = curthread; 396 count = 0; 397 thread_lock(td); 398 for (cpu = 0; cpu <= mp_maxid; cpu++) { 399 if (CPU_ABSENT(cpu)) 400 continue; 401 sched_bind(td, cpu); 402 thread_unlock(td); 403 count += mca_scan(0); 404 thread_lock(td); 405 sched_unbind(td); 406 } 407 thread_unlock(td); 408 if (count != 0) { 409 mtx_lock_spin(&mca_lock); 410 STAILQ_FOREACH(mca, &mca_records, link) { 411 if (!mca->logged) { 412 mca->logged = 1; 413 mtx_unlock_spin(&mca_lock); 414 mca_log(&mca->rec); 415 mtx_lock_spin(&mca_lock); 416 } 417 } 418 mtx_unlock_spin(&mca_lock); 419 } 420} 421 422static void 423mca_periodic_scan(void *arg) 424{ 425 426 taskqueue_enqueue(taskqueue_thread, &mca_task); 427 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL); 428} 429 430static int 431sysctl_mca_scan(SYSCTL_HANDLER_ARGS) 432{ 433 int error, i; 434 435 i = 0; 436 error = sysctl_handle_int(oidp, &i, 0, req); 437 if (error) 438 return (error); 439 if (i) 440 taskqueue_enqueue(taskqueue_thread, &mca_task); 441 return (0); 442} 443 444static void 445mca_startup(void *dummy) 446{ 447 448 if (!mca_enabled || !(cpu_feature & CPUID_MCA)) 449 return; 450 451 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, 452 NULL); 453} 454SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL); 455 456static void 457mca_setup(void) 458{ 459 460 mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); 461 STAILQ_INIT(&mca_records); 462 TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL); 463 callout_init(&mca_timer, CALLOUT_MPSAFE); 464 SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 465 "count", CTLFLAG_RD, &mca_count, 0, "Record count"); 466 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 467 "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks, 468 0, sysctl_mca_ticks, "I", 469 "Periodic interval in seconds to scan for machine checks"); 470 SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 471 "records", CTLFLAG_RD, sysctl_mca_records, "Machine check records"); 472 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 473 "force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, 474 sysctl_mca_scan, "I", "Force an immediate scan for machine checks"); 475} 476 477/* Must be executed on each CPU. */ 478void 479mca_init(void) 480{ 481 uint64_t mcg_cap; 482 uint64_t ctl; 483 int skip; 484 int i; 485 486 /* MCE is required. */ 487 if (!mca_enabled || !(cpu_feature & CPUID_MCE)) 488 return; 489 490 if (cpu_feature & CPUID_MCA) { 491 if (PCPU_GET(cpuid) == 0) 492 mca_setup(); 493 494 sched_pin(); 495 mcg_cap = rdmsr(MSR_MCG_CAP); 496 if (mcg_cap & MCG_CAP_CTL_P) 497 /* Enable MCA features. */ 498 wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); 499 500 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { 501 /* By default enable logging of all errors. */ 502 ctl = 0xffffffffffffffffUL; 503 skip = 0; 504 505 if (cpu_vendor_id == CPU_VENDOR_INTEL) { 506 /* 507 * For P6 models before Nehalem MC0_CTL is 508 * always enabled and reserved. 509 */ 510 if (i == 0 && CPUID_TO_FAMILY(cpu_id) == 0x6 511 && CPUID_TO_MODEL(cpu_id) < 0x1a) 512 skip = 1; 513 } else if (cpu_vendor_id == CPU_VENDOR_AMD) { 514 /* BKDG for Family 10h: unset GartTblWkEn. */ 515 if (i == 4 && CPUID_TO_FAMILY(cpu_id) >= 0xf) 516 ctl &= ~(1UL << 10); 517 } 518 519 if (!skip) 520 wrmsr(MSR_MC_CTL(i), ctl); 521 /* Clear all errors. */ 522 wrmsr(MSR_MC_STATUS(i), 0); 523 } 524 sched_unpin(); 525 } 526 527 load_cr4(rcr4() | CR4_MCE); 528} 529 530/* Called when a machine check exception fires. */ 531int 532mca_intr(void) 533{ 534 uint64_t mcg_status; 535 int recoverable; 536 537 if (!(cpu_feature & CPUID_MCA)) { 538 /* 539 * Just print the values of the old Pentium registers 540 * and panic. 541 */ 542 printf("MC Type: 0x%llx Address: 0x%llx\n", 543 rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR)); 544 return (0); 545 } 546 547 /* Scan the banks and check for any non-recoverable errors. */ 548 recoverable = mca_scan(1); 549 mcg_status = rdmsr(MSR_MCG_STATUS); 550 if (!(mcg_status & MCG_STATUS_RIPV)) 551 recoverable = 0; 552 553 /* Clear MCIP. */ 554 wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP); 555 return (recoverable); 556} 557