mca.c revision 200033
1/*- 2 * Copyright (c) 2009 Advanced Computing Technologies LLC 3 * Written by: John H. Baldwin <jhb@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28/* 29 * Support for x86 machine check architecture. 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: head/sys/i386/i386/mca.c 200033 2009-12-02 15:45:55Z avg $"); 34 35#include <sys/param.h> 36#include <sys/kernel.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/mutex.h> 40#include <sys/proc.h> 41#include <sys/sched.h> 42#include <sys/smp.h> 43#include <sys/sysctl.h> 44#include <sys/systm.h> 45#include <sys/taskqueue.h> 46#include <machine/mca.h> 47#include <machine/md_var.h> 48#include <machine/specialreg.h> 49 50struct mca_internal { 51 struct mca_record rec; 52 int logged; 53 STAILQ_ENTRY(mca_internal) link; 54}; 55 56static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture"); 57 58static int mca_count; /* Number of records stored. */ 59 60SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture"); 61 62static int mca_enabled = 0; 63TUNABLE_INT("hw.mca.enabled", &mca_enabled); 64SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0, 65 "Administrative toggle for machine check support"); 66 67static STAILQ_HEAD(, mca_internal) mca_records; 68static struct callout mca_timer; 69static int mca_ticks = 3600; /* Check hourly by default. */ 70static struct task mca_task; 71static struct mtx mca_lock; 72 73static int 74sysctl_mca_ticks(SYSCTL_HANDLER_ARGS) 75{ 76 int error, value; 77 78 value = mca_ticks; 79 error = sysctl_handle_int(oidp, &value, 0, req); 80 if (error || req->newptr == NULL) 81 return (error); 82 if (value <= 0) 83 return (EINVAL); 84 mca_ticks = value; 85 return (0); 86} 87 88static int 89sysctl_mca_records(SYSCTL_HANDLER_ARGS) 90{ 91 int *name = (int *)arg1; 92 u_int namelen = arg2; 93 struct mca_record record; 94 struct mca_internal *rec; 95 int i; 96 97 if (namelen != 1) 98 return (EINVAL); 99 100 if (name[0] < 0 || name[0] >= mca_count) 101 return (EINVAL); 102 103 mtx_lock_spin(&mca_lock); 104 if (name[0] >= mca_count) { 105 mtx_unlock_spin(&mca_lock); 106 return (EINVAL); 107 } 108 i = 0; 109 STAILQ_FOREACH(rec, &mca_records, link) { 110 if (i == name[0]) { 111 record = rec->rec; 112 break; 113 } 114 i++; 115 } 116 mtx_unlock_spin(&mca_lock); 117 return (SYSCTL_OUT(req, &record, sizeof(record))); 118} 119 120static const char * 121mca_error_ttype(uint16_t mca_error) 122{ 123 124 switch ((mca_error & 0x000c) >> 2) { 125 case 0: 126 return ("I"); 127 case 1: 128 return ("D"); 129 case 2: 130 return ("G"); 131 } 132 return ("?"); 133} 134 135static const char * 136mca_error_level(uint16_t mca_error) 137{ 138 139 switch (mca_error & 0x0003) { 140 case 0: 141 return ("L0"); 142 case 1: 143 return ("L1"); 144 case 2: 145 return ("L2"); 146 case 3: 147 return ("LG"); 148 } 149 return ("L?"); 150} 151 152static const char * 153mca_error_request(uint16_t mca_error) 154{ 155 156 switch ((mca_error & 0x00f0) >> 4) { 157 case 0x0: 158 return ("ERR"); 159 case 0x1: 160 return ("RD"); 161 case 0x2: 162 return ("WR"); 163 case 0x3: 164 return ("DRD"); 165 case 0x4: 166 return ("DWR"); 167 case 0x5: 168 return ("IRD"); 169 case 0x6: 170 return ("PREFETCH"); 171 case 0x7: 172 return ("EVICT"); 173 case 0x8: 174 return ("SNOOP"); 175 } 176 return ("???"); 177} 178 179/* Dump details about a single machine check. */ 180static void __nonnull(1) 181mca_log(const struct mca_record *rec) 182{ 183 uint16_t mca_error; 184 185 printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank, 186 (long long)rec->mr_status); 187 printf("MCA: CPU %d ", rec->mr_apic_id); 188 if (rec->mr_status & MC_STATUS_UC) 189 printf("UNCOR "); 190 else 191 printf("COR "); 192 if (rec->mr_status & MC_STATUS_PCC) 193 printf("PCC "); 194 if (rec->mr_status & MC_STATUS_OVER) 195 printf("OVER "); 196 mca_error = rec->mr_status & MC_STATUS_MCA_ERROR; 197 switch (mca_error) { 198 /* Simple error codes. */ 199 case 0x0000: 200 printf("no error"); 201 break; 202 case 0x0001: 203 printf("unclassified error"); 204 break; 205 case 0x0002: 206 printf("ucode ROM parity error"); 207 break; 208 case 0x0003: 209 printf("external error"); 210 break; 211 case 0x0004: 212 printf("FRC error"); 213 break; 214 case 0x0400: 215 printf("internal timer error"); 216 break; 217 default: 218 if ((mca_error & 0xfc00) == 0x0400) { 219 printf("internal error %x", mca_error & 0x03ff); 220 break; 221 } 222 223 /* Compound error codes. */ 224 225 /* Memory hierarchy error. */ 226 if ((mca_error & 0xeffc) == 0x000c) { 227 printf("%s memory error", mca_error_level(mca_error)); 228 break; 229 } 230 231 /* TLB error. */ 232 if ((mca_error & 0xeff0) == 0x0010) { 233 printf("%sTLB %s error", mca_error_ttype(mca_error), 234 mca_error_level(mca_error)); 235 break; 236 } 237 238 /* Cache error. */ 239 if ((mca_error & 0xef00) == 0x0100) { 240 printf("%sCACHE %s %s error", 241 mca_error_ttype(mca_error), 242 mca_error_level(mca_error), 243 mca_error_request(mca_error)); 244 break; 245 } 246 247 /* Bus and/or Interconnect error. */ 248 if ((mca_error & 0xe800) == 0x0800) { 249 printf("BUS%s ", mca_error_level(mca_error)); 250 switch ((mca_error & 0x0600) >> 9) { 251 case 0: 252 printf("Source"); 253 break; 254 case 1: 255 printf("Responder"); 256 break; 257 case 2: 258 printf("Observer"); 259 break; 260 default: 261 printf("???"); 262 break; 263 } 264 printf(" %s ", mca_error_request(mca_error)); 265 switch ((mca_error & 0x000c) >> 2) { 266 case 0: 267 printf("Memory"); 268 break; 269 case 2: 270 printf("I/O"); 271 break; 272 case 3: 273 printf("Other"); 274 break; 275 default: 276 printf("???"); 277 break; 278 } 279 if (mca_error & 0x0100) 280 printf(" timed out"); 281 break; 282 } 283 284 printf("unknown error %x", mca_error); 285 break; 286 } 287 printf("\n"); 288 if (rec->mr_status & MC_STATUS_ADDRV) 289 printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr); 290} 291 292static int __nonnull(2) 293mca_check_status(int bank, struct mca_record *rec) 294{ 295 uint64_t status; 296 u_int p[4]; 297 298 status = rdmsr(MSR_MC_STATUS(bank)); 299 if (!(status & MC_STATUS_VAL)) 300 return (0); 301 302 /* Save exception information. */ 303 rec->mr_status = status; 304 rec->mr_bank = bank; 305 rec->mr_addr = 0; 306 if (status & MC_STATUS_ADDRV) 307 rec->mr_addr = rdmsr(MSR_MC_ADDR(bank)); 308 rec->mr_misc = 0; 309 if (status & MC_STATUS_MISCV) 310 rec->mr_misc = rdmsr(MSR_MC_MISC(bank)); 311 rec->mr_tsc = rdtsc(); 312 rec->mr_apic_id = PCPU_GET(apic_id); 313 314 /* 315 * Clear machine check. Don't do this for uncorrectable 316 * errors so that the BIOS can see them. 317 */ 318 if (!(rec->mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { 319 wrmsr(MSR_MC_STATUS(bank), 0); 320 do_cpuid(0, p); 321 } 322 return (1); 323} 324 325static void __nonnull(1) 326mca_record_entry(const struct mca_record *record) 327{ 328 struct mca_internal *rec; 329 330 rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT); 331 if (rec == NULL) { 332 printf("MCA: Unable to allocate space for an event.\n"); 333 mca_log(record); 334 return; 335 } 336 337 rec->rec = *record; 338 rec->logged = 0; 339 mtx_lock_spin(&mca_lock); 340 STAILQ_INSERT_TAIL(&mca_records, rec, link); 341 mca_count++; 342 mtx_unlock_spin(&mca_lock); 343} 344 345/* 346 * This scans all the machine check banks of the current CPU to see if 347 * there are any machine checks. Any non-recoverable errors are 348 * reported immediately via mca_log(). The current thread must be 349 * pinned when this is called. The 'mcip' parameter indicates if we 350 * are being called from the MC exception handler. In that case this 351 * function returns true if the system is restartable. Otherwise, it 352 * returns a count of the number of valid MC records found. 353 */ 354static int 355mca_scan(int mcip) 356{ 357 struct mca_record rec; 358 uint64_t mcg_cap, ucmask; 359 int count, i, recoverable; 360 361 count = 0; 362 recoverable = 1; 363 ucmask = MC_STATUS_UC | MC_STATUS_PCC; 364 365 /* When handling a MCE#, treat the OVER flag as non-restartable. */ 366 if (mcip) 367 ucmask |= MC_STATUS_OVER; 368 mcg_cap = rdmsr(MSR_MCG_CAP); 369 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { 370 if (mca_check_status(i, &rec)) { 371 count++; 372 if (rec.mr_status & ucmask) { 373 recoverable = 0; 374 mca_log(&rec); 375 } 376 mca_record_entry(&rec); 377 } 378 } 379 return (mcip ? recoverable : count); 380} 381 382/* 383 * Scan the machine check banks on all CPUs by binding to each CPU in 384 * turn. If any of the CPUs contained new machine check records, log 385 * them to the console. 386 */ 387static void 388mca_scan_cpus(void *context, int pending) 389{ 390 struct mca_internal *mca; 391 struct thread *td; 392 int count, cpu; 393 394 td = curthread; 395 count = 0; 396 thread_lock(td); 397 for (cpu = 0; cpu <= mp_maxid; cpu++) { 398 if (CPU_ABSENT(cpu)) 399 continue; 400 sched_bind(td, cpu); 401 thread_unlock(td); 402 count += mca_scan(0); 403 thread_lock(td); 404 sched_unbind(td); 405 } 406 thread_unlock(td); 407 if (count != 0) { 408 mtx_lock_spin(&mca_lock); 409 STAILQ_FOREACH(mca, &mca_records, link) { 410 if (!mca->logged) { 411 mca->logged = 1; 412 mtx_unlock_spin(&mca_lock); 413 mca_log(&mca->rec); 414 mtx_lock_spin(&mca_lock); 415 } 416 } 417 mtx_unlock_spin(&mca_lock); 418 } 419} 420 421static void 422mca_periodic_scan(void *arg) 423{ 424 425 taskqueue_enqueue(taskqueue_thread, &mca_task); 426 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL); 427} 428 429static int 430sysctl_mca_scan(SYSCTL_HANDLER_ARGS) 431{ 432 int error, i; 433 434 i = 0; 435 error = sysctl_handle_int(oidp, &i, 0, req); 436 if (error) 437 return (error); 438 if (i) 439 taskqueue_enqueue(taskqueue_thread, &mca_task); 440 return (0); 441} 442 443static void 444mca_startup(void *dummy) 445{ 446 447 if (!mca_enabled || !(cpu_feature & CPUID_MCA)) 448 return; 449 450 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, 451 NULL); 452} 453SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL); 454 455static void 456mca_setup(void) 457{ 458 459 mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); 460 STAILQ_INIT(&mca_records); 461 TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL); 462 callout_init(&mca_timer, CALLOUT_MPSAFE); 463 SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 464 "count", CTLFLAG_RD, &mca_count, 0, "Record count"); 465 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 466 "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks, 467 0, sysctl_mca_ticks, "I", 468 "Periodic interval in seconds to scan for machine checks"); 469 SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 470 "records", CTLFLAG_RD, sysctl_mca_records, "Machine check records"); 471 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 472 "force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, 473 sysctl_mca_scan, "I", "Force an immediate scan for machine checks"); 474} 475 476/* Must be executed on each CPU. */ 477void 478mca_init(void) 479{ 480 uint64_t mcg_cap; 481 int i; 482 483 /* MCE is required. */ 484 if (!mca_enabled || !(cpu_feature & CPUID_MCE)) 485 return; 486 487 if (cpu_feature & CPUID_MCA) { 488 if (PCPU_GET(cpuid) == 0) 489 mca_setup(); 490 491 sched_pin(); 492 mcg_cap = rdmsr(MSR_MCG_CAP); 493 if (mcg_cap & MCG_CAP_CTL_P) 494 /* Enable MCA features. */ 495 wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); 496 497 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { 498 /* 499 * Enable logging of all errors. For P6 500 * processors, MC0_CTL is always enabled. 501 * 502 * XXX: Better CPU test needed here? 503 */ 504 if (!(i == 0 && (cpu_id & 0xf00) == 0x600)) 505 wrmsr(MSR_MC_CTL(i), 0xffffffffffffffffUL); 506 507 /* Clear all errors. */ 508 wrmsr(MSR_MC_STATUS(i), 0); 509 } 510 sched_unpin(); 511 } 512 513 load_cr4(rcr4() | CR4_MCE); 514} 515 516/* Called when a machine check exception fires. */ 517int 518mca_intr(void) 519{ 520 uint64_t mcg_status; 521 int recoverable; 522 523 if (!(cpu_feature & CPUID_MCA)) { 524 /* 525 * Just print the values of the old Pentium registers 526 * and panic. 527 */ 528 printf("MC Type: 0x%llx Address: 0x%llx\n", 529 rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR)); 530 return (0); 531 } 532 533 /* Scan the banks and check for any non-recoverable errors. */ 534 recoverable = mca_scan(1); 535 mcg_status = rdmsr(MSR_MCG_STATUS); 536 if (!(mcg_status & MCG_STATUS_RIPV)) 537 recoverable = 0; 538 539 /* Clear MCIP. */ 540 wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP); 541 return (recoverable); 542} 543