mca.c revision 192050
1/*- 2 * Copyright (c) 2009 Advanced Computing Technologies LLC 3 * Written by: John H. Baldwin <jhb@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28/* 29 * Support for x86 machine check architecture. 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: head/sys/i386/i386/mca.c 192050 2009-05-13 17:53:04Z jhb $"); 34 35#include <sys/param.h> 36#include <sys/kernel.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/mutex.h> 40#include <sys/proc.h> 41#include <sys/sched.h> 42#include <sys/smp.h> 43#include <sys/sysctl.h> 44#include <sys/systm.h> 45#include <sys/taskqueue.h> 46#include <machine/mca.h> 47#include <machine/md_var.h> 48#include <machine/specialreg.h> 49 50struct mca_internal { 51 struct mca_record rec; 52 int logged; 53 STAILQ_ENTRY(mca_internal) link; 54}; 55 56static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture"); 57 58static struct sysctl_oid *mca_sysctl_tree; 59 60static int mca_count; /* Number of records stored. */ 61 62static STAILQ_HEAD(, mca_internal) mca_records; 63static struct callout mca_timer; 64static int mca_ticks = 3600; /* Check hourly by default. */ 65static struct task mca_task; 66static struct mtx mca_lock; 67 68static int 69sysctl_mca_ticks(SYSCTL_HANDLER_ARGS) 70{ 71 int error, value; 72 73 value = mca_ticks; 74 error = sysctl_handle_int(oidp, &value, 0, req); 75 if (error || req->newptr == NULL) 76 return (error); 77 if (value <= 0) 78 return (EINVAL); 79 mca_ticks = value; 80 return (0); 81} 82 83static int 84sysctl_mca_records(SYSCTL_HANDLER_ARGS) 85{ 86 int *name = (int *)arg1; 87 u_int namelen = arg2; 88 struct mca_record record; 89 struct mca_internal *rec; 90 int i; 91 92 if (namelen != 1) 93 return (EINVAL); 94 95 if (name[0] < 0 || name[0] >= mca_count) 96 return (EINVAL); 97 98 mtx_lock_spin(&mca_lock); 99 if (name[0] >= mca_count) { 100 mtx_unlock_spin(&mca_lock); 101 return (EINVAL); 102 } 103 i = 0; 104 STAILQ_FOREACH(rec, &mca_records, link) { 105 if (i == name[0]) { 106 record = rec->rec; 107 break; 108 } 109 i++; 110 } 111 mtx_unlock_spin(&mca_lock); 112 return (SYSCTL_OUT(req, &record, sizeof(record))); 113} 114 115static struct mca_record * 116mca_record_entry(int bank) 117{ 118 struct mca_internal *rec; 119 uint64_t status; 120 u_int p[4]; 121 122 status = rdmsr(MSR_MC_STATUS(bank)); 123 if (!(status & MC_STATUS_VAL)) 124 return (NULL); 125 126 rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT | M_ZERO); 127 if (rec == NULL) { 128 printf("MCA: Unable to allocate space for an event.\n"); 129 return (NULL); 130 } 131 132 /* Save exception information. */ 133 rec->rec.mr_status = status; 134 if (status & MC_STATUS_ADDRV) 135 rec->rec.mr_addr = rdmsr(MSR_MC_ADDR(bank)); 136 if (status & MC_STATUS_MISCV) 137 rec->rec.mr_misc = rdmsr(MSR_MC_MISC(bank)); 138 rec->rec.mr_tsc = rdtsc(); 139 rec->rec.mr_apic_id = PCPU_GET(apic_id); 140 141 /* 142 * Clear machine check. Don't do this for uncorrectable 143 * errors so that the BIOS can see them. 144 */ 145 if (!(rec->rec.mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { 146 wrmsr(MSR_MC_STATUS(bank), 0); 147 do_cpuid(0, p); 148 } 149 150 mtx_lock_spin(&mca_lock); 151 STAILQ_INSERT_TAIL(&mca_records, rec, link); 152 mca_count++; 153 mtx_unlock_spin(&mca_lock); 154 return (&rec->rec); 155} 156 157static const char * 158mca_error_ttype(uint16_t mca_error) 159{ 160 161 switch ((mca_error & 0x000c) >> 2) { 162 case 0: 163 return ("I"); 164 case 1: 165 return ("D"); 166 case 2: 167 return ("G"); 168 } 169 return ("?"); 170} 171 172static const char * 173mca_error_level(uint16_t mca_error) 174{ 175 176 switch (mca_error & 0x0003) { 177 case 0: 178 return ("L0"); 179 case 1: 180 return ("L1"); 181 case 2: 182 return ("L2"); 183 case 3: 184 return ("LG"); 185 } 186 return ("L?"); 187} 188 189static const char * 190mca_error_request(uint16_t mca_error) 191{ 192 193 switch ((mca_error & 0x00f0) >> 4) { 194 case 0x0: 195 return ("ERR"); 196 case 0x1: 197 return ("RD"); 198 case 0x2: 199 return ("WR"); 200 case 0x3: 201 return ("DRD"); 202 case 0x4: 203 return ("DWR"); 204 case 0x5: 205 return ("IRD"); 206 case 0x6: 207 return ("PREFETCH"); 208 case 0x7: 209 return ("EVICT"); 210 case 0x8: 211 return ("SNOOP"); 212 } 213 return ("???"); 214} 215 216/* Dump details about a single machine check. */ 217static void 218mca_log(struct mca_record *rec) 219{ 220 uint16_t mca_error; 221 222 printf("MCA: CPU %d ", rec->mr_apic_id); 223 if (rec->mr_status & MC_STATUS_UC) 224 printf("UNCOR "); 225 else 226 printf("COR "); 227 if (rec->mr_status & MC_STATUS_PCC) 228 printf("PCC "); 229 if (rec->mr_status & MC_STATUS_OVER) 230 printf("OVER "); 231 mca_error = rec->mr_status & MC_STATUS_MCA_ERROR; 232 switch (mca_error) { 233 /* Simple error codes. */ 234 case 0x0000: 235 printf("no error"); 236 break; 237 case 0x0001: 238 printf("unclassified error"); 239 break; 240 case 0x0002: 241 printf("ucode ROM parity error"); 242 break; 243 case 0x0003: 244 printf("external error"); 245 break; 246 case 0x0004: 247 printf("FRC error"); 248 break; 249 case 0x0400: 250 printf("internal timer error"); 251 break; 252 default: 253 if ((mca_error & 0xfc00) == 0x0400) { 254 printf("internal error %x", mca_error & 0x03ff); 255 break; 256 } 257 258 /* Compound error codes. */ 259 260 /* Memory hierarchy error. */ 261 if ((mca_error & 0xeffc) == 0x000c) { 262 printf("%s memory error", mca_error_level(mca_error)); 263 break; 264 } 265 266 /* TLB error. */ 267 if ((mca_error & 0xeff0) == 0x0010) { 268 printf("%sTLB %s error", mca_error_ttype(mca_error), 269 mca_error_level(mca_error)); 270 break; 271 } 272 273 /* Cache error. */ 274 if ((mca_error & 0xef00) == 0x0100) { 275 printf("%sCACHE %s %s error", 276 mca_error_ttype(mca_error), 277 mca_error_level(mca_error), 278 mca_error_request(mca_error)); 279 break; 280 } 281 282 /* Bus and/or Interconnect error. */ 283 if ((mca_error & 0xe800) == 0x0800) { 284 printf("BUS%s ", mca_error_level(mca_error)); 285 switch ((mca_error & 0x0600) >> 9) { 286 case 0: 287 printf("Source"); 288 break; 289 case 1: 290 printf("Responder"); 291 break; 292 case 2: 293 printf("Observer"); 294 break; 295 default: 296 printf("???"); 297 break; 298 } 299 printf(" %s ", mca_error_request(mca_error)); 300 switch ((mca_error & 0x000c) >> 2) { 301 case 0: 302 printf("Memory"); 303 break; 304 case 2: 305 printf("I/O"); 306 break; 307 case 3: 308 printf("Other"); 309 break; 310 default: 311 printf("???"); 312 break; 313 } 314 if (mca_error & 0x0100) 315 printf(" timed out"); 316 break; 317 } 318 319 printf("unknown error %x", mca_error); 320 break; 321 } 322 printf("\n"); 323 if (rec->mr_status & MC_STATUS_ADDRV) 324 printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr); 325} 326 327/* 328 * This scans all the machine check banks of the current CPU to see if 329 * there are any machine checks. Any non-recoverable errors are 330 * reported immediately via mca_log(). The current thread must be 331 * pinned when this is called. The 'mcip' parameter indicates if we 332 * are being called from the MC exception handler. In that case this 333 * function returns true if the system is restartable. Otherwise, it 334 * returns a count of the number of valid MC records found. 335 */ 336static int 337mca_scan(int mcip) 338{ 339 struct mca_record *rec; 340 uint64_t mcg_cap, ucmask; 341 int count, i, recoverable; 342 343 count = 0; 344 recoverable = 1; 345 ucmask = MC_STATUS_UC | MC_STATUS_PCC; 346 347 /* When handling a MCE#, treat the OVER flag as non-restartable. */ 348 if (mcip) 349 ucmask = MC_STATUS_OVER; 350 mcg_cap = rdmsr(MSR_MCG_CAP); 351 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { 352 rec = mca_record_entry(i); 353 if (rec != NULL) { 354 count++; 355 if (rec->mr_status & ucmask) { 356 recoverable = 0; 357 mca_log(rec); 358 } 359 } 360 } 361 return (mcip ? recoverable : count); 362} 363 364/* 365 * Scan the machine check banks on all CPUs by binding to each CPU in 366 * turn. If any of the CPUs contained new machine check records, log 367 * them to the console. 368 */ 369static void 370mca_scan_cpus(void *context, int pending) 371{ 372 struct mca_internal *mca; 373 struct thread *td; 374 int count, cpu; 375 376 td = curthread; 377 count = 0; 378 thread_lock(td); 379 for (cpu = 0; cpu <= mp_maxid; cpu++) { 380 if (CPU_ABSENT(cpu)) 381 continue; 382 sched_bind(td, cpu); 383 thread_unlock(td); 384 count += mca_scan(0); 385 thread_lock(td); 386 sched_unbind(td); 387 } 388 thread_unlock(td); 389 if (count != 0) { 390 mtx_lock_spin(&mca_lock); 391 STAILQ_FOREACH(mca, &mca_records, link) { 392 if (!mca->logged) { 393 mca->logged = 1; 394 mtx_unlock_spin(&mca_lock); 395 mca_log(&mca->rec); 396 mtx_lock_spin(&mca_lock); 397 } 398 } 399 mtx_unlock_spin(&mca_lock); 400 } 401} 402 403static void 404mca_periodic_scan(void *arg) 405{ 406 407 taskqueue_enqueue(taskqueue_thread, &mca_task); 408 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL); 409} 410 411static int 412sysctl_mca_scan(SYSCTL_HANDLER_ARGS) 413{ 414 int error, i; 415 416 i = 0; 417 error = sysctl_handle_int(oidp, &i, 0, req); 418 if (error) 419 return (error); 420 if (i) 421 taskqueue_enqueue(taskqueue_thread, &mca_task); 422 return (0); 423} 424 425static void 426mca_startup(void *dummy) 427{ 428 429 if (!(cpu_feature & CPUID_MCA)) 430 return; 431 432 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, 433 NULL); 434} 435SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL); 436 437static void 438mca_setup(void) 439{ 440 441 mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); 442 STAILQ_INIT(&mca_records); 443 TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL); 444 callout_init(&mca_timer, CALLOUT_MPSAFE); 445 mca_sysctl_tree = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw), 446 OID_AUTO, "mca", CTLFLAG_RW, NULL, "MCA container"); 447 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO, 448 "count", CTLFLAG_RD, &mca_count, 0, "Record count"); 449 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO, 450 "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks, 451 0, sysctl_mca_ticks, "I", 452 "Periodic interval in seconds to scan for machine checks"); 453 SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO, 454 "records", CTLFLAG_RD, sysctl_mca_records, "Machine check records"); 455 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(mca_sysctl_tree), OID_AUTO, 456 "force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, 457 sysctl_mca_scan, "I", "Force an immediate scan for machine checks"); 458} 459 460/* Must be executed on each CPU. */ 461void 462mca_init(void) 463{ 464 uint64_t mcg_cap; 465 int i; 466 467 /* MCE is required. */ 468 if (!(cpu_feature & CPUID_MCE)) 469 return; 470 471 if (cpu_feature & CPUID_MCA) { 472 if (PCPU_GET(cpuid) == 0) 473 mca_setup(); 474 475 sched_pin(); 476 mcg_cap = rdmsr(MSR_MCG_CAP); 477 if (mcg_cap & MCG_CAP_CTL_P) 478 /* Enable MCA features. */ 479 wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); 480 481 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { 482 /* 483 * Enable logging of all errors. For P6 484 * processors, MC0_CTL is always enabled. 485 * 486 * XXX: Better CPU test needed here? 487 */ 488 if (!(i == 0 && (cpu_id & 0xf00) == 0x600)) 489 wrmsr(MSR_MC_CTL(i), 0xffffffffffffffffUL); 490 491 /* XXX: Better CPU test needed here. */ 492 if ((cpu_id & 0xf00) == 0xf00) 493 mca_record_entry(i); 494 495 /* Clear all errors. */ 496 wrmsr(MSR_MC_STATUS(i), 0); 497 } 498 sched_unpin(); 499 } 500 501 load_cr4(rcr4() | CR4_MCE); 502} 503 504/* Called when a machine check exception fires. */ 505int 506mca_intr(void) 507{ 508 uint64_t mcg_status; 509 int recoverable; 510 511 if (!(cpu_feature & CPUID_MCA)) { 512 /* 513 * Just print the values of the old Pentium registers 514 * and panic. 515 */ 516 printf("MC Type: 0x%llx Address: 0x%llx\n", 517 rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR)); 518 return (0); 519 } 520 521 /* Scan the banks and check for any non-recoverable errors. */ 522 recoverable = mca_scan(1); 523 mcg_status = rdmsr(MSR_MCG_STATUS); 524 if (!(mcg_status & MCG_STATUS_RIPV)) 525 recoverable = 0; 526 527 /* Clear MCIP. */ 528 wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP); 529 return (recoverable); 530} 531