mca.c revision 192440
1/*- 2 * Copyright (c) 2009 Advanced Computing Technologies LLC 3 * Written by: John H. Baldwin <jhb@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28/* 29 * Support for x86 machine check architecture. 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: head/sys/i386/i386/mca.c 192440 2009-05-20 16:11:22Z jhb $"); 34 35#include <sys/param.h> 36#include <sys/kernel.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/mutex.h> 40#include <sys/proc.h> 41#include <sys/sched.h> 42#include <sys/smp.h> 43#include <sys/sysctl.h> 44#include <sys/systm.h> 45#include <sys/taskqueue.h> 46#include <machine/mca.h> 47#include <machine/md_var.h> 48#include <machine/specialreg.h> 49 50struct mca_internal { 51 struct mca_record rec; 52 int logged; 53 STAILQ_ENTRY(mca_internal) link; 54}; 55 56static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture"); 57 58static int mca_count; /* Number of records stored. */ 59 60SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture"); 61 62static int mca_enabled = 0; 63TUNABLE_INT("hw.mca.enabled", &mca_enabled); 64SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0, 65 "Administrative toggle for machine check support"); 66 67static STAILQ_HEAD(, mca_internal) mca_records; 68static struct callout mca_timer; 69static int mca_ticks = 3600; /* Check hourly by default. */ 70static struct task mca_task; 71static struct mtx mca_lock; 72 73static int 74sysctl_mca_ticks(SYSCTL_HANDLER_ARGS) 75{ 76 int error, value; 77 78 value = mca_ticks; 79 error = sysctl_handle_int(oidp, &value, 0, req); 80 if (error || req->newptr == NULL) 81 return (error); 82 if (value <= 0) 83 return (EINVAL); 84 mca_ticks = value; 85 return (0); 86} 87 88static int 89sysctl_mca_records(SYSCTL_HANDLER_ARGS) 90{ 91 int *name = (int *)arg1; 92 u_int namelen = arg2; 93 struct mca_record record; 94 struct mca_internal *rec; 95 int i; 96 97 if (namelen != 1) 98 return (EINVAL); 99 100 if (name[0] < 0 || name[0] >= mca_count) 101 return (EINVAL); 102 103 mtx_lock_spin(&mca_lock); 104 if (name[0] >= mca_count) { 105 mtx_unlock_spin(&mca_lock); 106 return (EINVAL); 107 } 108 i = 0; 109 STAILQ_FOREACH(rec, &mca_records, link) { 110 if (i == name[0]) { 111 record = rec->rec; 112 break; 113 } 114 i++; 115 } 116 mtx_unlock_spin(&mca_lock); 117 return (SYSCTL_OUT(req, &record, sizeof(record))); 118} 119 120static struct mca_record * 121mca_record_entry(int bank) 122{ 123 struct mca_internal *rec; 124 uint64_t status; 125 u_int p[4]; 126 127 status = rdmsr(MSR_MC_STATUS(bank)); 128 if (!(status & MC_STATUS_VAL)) 129 return (NULL); 130 131 rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT | M_ZERO); 132 if (rec == NULL) { 133 printf("MCA: Unable to allocate space for an event.\n"); 134 return (NULL); 135 } 136 137 /* Save exception information. */ 138 rec->rec.mr_status = status; 139 if (status & MC_STATUS_ADDRV) 140 rec->rec.mr_addr = rdmsr(MSR_MC_ADDR(bank)); 141 if (status & MC_STATUS_MISCV) 142 rec->rec.mr_misc = rdmsr(MSR_MC_MISC(bank)); 143 rec->rec.mr_tsc = rdtsc(); 144 rec->rec.mr_apic_id = PCPU_GET(apic_id); 145 146 /* 147 * Clear machine check. Don't do this for uncorrectable 148 * errors so that the BIOS can see them. 149 */ 150 if (!(rec->rec.mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { 151 wrmsr(MSR_MC_STATUS(bank), 0); 152 do_cpuid(0, p); 153 } 154 155 mtx_lock_spin(&mca_lock); 156 STAILQ_INSERT_TAIL(&mca_records, rec, link); 157 mca_count++; 158 mtx_unlock_spin(&mca_lock); 159 return (&rec->rec); 160} 161 162static const char * 163mca_error_ttype(uint16_t mca_error) 164{ 165 166 switch ((mca_error & 0x000c) >> 2) { 167 case 0: 168 return ("I"); 169 case 1: 170 return ("D"); 171 case 2: 172 return ("G"); 173 } 174 return ("?"); 175} 176 177static const char * 178mca_error_level(uint16_t mca_error) 179{ 180 181 switch (mca_error & 0x0003) { 182 case 0: 183 return ("L0"); 184 case 1: 185 return ("L1"); 186 case 2: 187 return ("L2"); 188 case 3: 189 return ("LG"); 190 } 191 return ("L?"); 192} 193 194static const char * 195mca_error_request(uint16_t mca_error) 196{ 197 198 switch ((mca_error & 0x00f0) >> 4) { 199 case 0x0: 200 return ("ERR"); 201 case 0x1: 202 return ("RD"); 203 case 0x2: 204 return ("WR"); 205 case 0x3: 206 return ("DRD"); 207 case 0x4: 208 return ("DWR"); 209 case 0x5: 210 return ("IRD"); 211 case 0x6: 212 return ("PREFETCH"); 213 case 0x7: 214 return ("EVICT"); 215 case 0x8: 216 return ("SNOOP"); 217 } 218 return ("???"); 219} 220 221/* Dump details about a single machine check. */ 222static void 223mca_log(struct mca_record *rec) 224{ 225 uint16_t mca_error; 226 227 printf("MCA: CPU %d ", rec->mr_apic_id); 228 if (rec->mr_status & MC_STATUS_UC) 229 printf("UNCOR "); 230 else 231 printf("COR "); 232 if (rec->mr_status & MC_STATUS_PCC) 233 printf("PCC "); 234 if (rec->mr_status & MC_STATUS_OVER) 235 printf("OVER "); 236 mca_error = rec->mr_status & MC_STATUS_MCA_ERROR; 237 switch (mca_error) { 238 /* Simple error codes. */ 239 case 0x0000: 240 printf("no error"); 241 break; 242 case 0x0001: 243 printf("unclassified error"); 244 break; 245 case 0x0002: 246 printf("ucode ROM parity error"); 247 break; 248 case 0x0003: 249 printf("external error"); 250 break; 251 case 0x0004: 252 printf("FRC error"); 253 break; 254 case 0x0400: 255 printf("internal timer error"); 256 break; 257 default: 258 if ((mca_error & 0xfc00) == 0x0400) { 259 printf("internal error %x", mca_error & 0x03ff); 260 break; 261 } 262 263 /* Compound error codes. */ 264 265 /* Memory hierarchy error. */ 266 if ((mca_error & 0xeffc) == 0x000c) { 267 printf("%s memory error", mca_error_level(mca_error)); 268 break; 269 } 270 271 /* TLB error. */ 272 if ((mca_error & 0xeff0) == 0x0010) { 273 printf("%sTLB %s error", mca_error_ttype(mca_error), 274 mca_error_level(mca_error)); 275 break; 276 } 277 278 /* Cache error. */ 279 if ((mca_error & 0xef00) == 0x0100) { 280 printf("%sCACHE %s %s error", 281 mca_error_ttype(mca_error), 282 mca_error_level(mca_error), 283 mca_error_request(mca_error)); 284 break; 285 } 286 287 /* Bus and/or Interconnect error. */ 288 if ((mca_error & 0xe800) == 0x0800) { 289 printf("BUS%s ", mca_error_level(mca_error)); 290 switch ((mca_error & 0x0600) >> 9) { 291 case 0: 292 printf("Source"); 293 break; 294 case 1: 295 printf("Responder"); 296 break; 297 case 2: 298 printf("Observer"); 299 break; 300 default: 301 printf("???"); 302 break; 303 } 304 printf(" %s ", mca_error_request(mca_error)); 305 switch ((mca_error & 0x000c) >> 2) { 306 case 0: 307 printf("Memory"); 308 break; 309 case 2: 310 printf("I/O"); 311 break; 312 case 3: 313 printf("Other"); 314 break; 315 default: 316 printf("???"); 317 break; 318 } 319 if (mca_error & 0x0100) 320 printf(" timed out"); 321 break; 322 } 323 324 printf("unknown error %x", mca_error); 325 break; 326 } 327 printf("\n"); 328 if (rec->mr_status & MC_STATUS_ADDRV) 329 printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr); 330} 331 332/* 333 * This scans all the machine check banks of the current CPU to see if 334 * there are any machine checks. Any non-recoverable errors are 335 * reported immediately via mca_log(). The current thread must be 336 * pinned when this is called. The 'mcip' parameter indicates if we 337 * are being called from the MC exception handler. In that case this 338 * function returns true if the system is restartable. Otherwise, it 339 * returns a count of the number of valid MC records found. 340 */ 341static int 342mca_scan(int mcip) 343{ 344 struct mca_record *rec; 345 uint64_t mcg_cap, ucmask; 346 int count, i, recoverable; 347 348 count = 0; 349 recoverable = 1; 350 ucmask = MC_STATUS_UC | MC_STATUS_PCC; 351 352 /* When handling a MCE#, treat the OVER flag as non-restartable. */ 353 if (mcip) 354 ucmask |= MC_STATUS_OVER; 355 mcg_cap = rdmsr(MSR_MCG_CAP); 356 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { 357 rec = mca_record_entry(i); 358 if (rec != NULL) { 359 count++; 360 if (rec->mr_status & ucmask) { 361 recoverable = 0; 362 mca_log(rec); 363 } 364 } 365 } 366 return (mcip ? recoverable : count); 367} 368 369/* 370 * Scan the machine check banks on all CPUs by binding to each CPU in 371 * turn. If any of the CPUs contained new machine check records, log 372 * them to the console. 373 */ 374static void 375mca_scan_cpus(void *context, int pending) 376{ 377 struct mca_internal *mca; 378 struct thread *td; 379 int count, cpu; 380 381 td = curthread; 382 count = 0; 383 thread_lock(td); 384 for (cpu = 0; cpu <= mp_maxid; cpu++) { 385 if (CPU_ABSENT(cpu)) 386 continue; 387 sched_bind(td, cpu); 388 thread_unlock(td); 389 count += mca_scan(0); 390 thread_lock(td); 391 sched_unbind(td); 392 } 393 thread_unlock(td); 394 if (count != 0) { 395 mtx_lock_spin(&mca_lock); 396 STAILQ_FOREACH(mca, &mca_records, link) { 397 if (!mca->logged) { 398 mca->logged = 1; 399 mtx_unlock_spin(&mca_lock); 400 mca_log(&mca->rec); 401 mtx_lock_spin(&mca_lock); 402 } 403 } 404 mtx_unlock_spin(&mca_lock); 405 } 406} 407 408static void 409mca_periodic_scan(void *arg) 410{ 411 412 taskqueue_enqueue(taskqueue_thread, &mca_task); 413 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL); 414} 415 416static int 417sysctl_mca_scan(SYSCTL_HANDLER_ARGS) 418{ 419 int error, i; 420 421 i = 0; 422 error = sysctl_handle_int(oidp, &i, 0, req); 423 if (error) 424 return (error); 425 if (i) 426 taskqueue_enqueue(taskqueue_thread, &mca_task); 427 return (0); 428} 429 430static void 431mca_startup(void *dummy) 432{ 433 434 if (!mca_enabled || !(cpu_feature & CPUID_MCA)) 435 return; 436 437 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, 438 NULL); 439} 440SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL); 441 442static void 443mca_setup(void) 444{ 445 446 mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); 447 STAILQ_INIT(&mca_records); 448 TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL); 449 callout_init(&mca_timer, CALLOUT_MPSAFE); 450 SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 451 "count", CTLFLAG_RD, &mca_count, 0, "Record count"); 452 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 453 "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks, 454 0, sysctl_mca_ticks, "I", 455 "Periodic interval in seconds to scan for machine checks"); 456 SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 457 "records", CTLFLAG_RD, sysctl_mca_records, "Machine check records"); 458 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, 459 "force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, 460 sysctl_mca_scan, "I", "Force an immediate scan for machine checks"); 461} 462 463/* Must be executed on each CPU. */ 464void 465mca_init(void) 466{ 467 uint64_t mcg_cap; 468 int i; 469 470 /* MCE is required. */ 471 if (!mca_enabled || !(cpu_feature & CPUID_MCE)) 472 return; 473 474 if (cpu_feature & CPUID_MCA) { 475 if (PCPU_GET(cpuid) == 0) 476 mca_setup(); 477 478 sched_pin(); 479 mcg_cap = rdmsr(MSR_MCG_CAP); 480 if (mcg_cap & MCG_CAP_CTL_P) 481 /* Enable MCA features. */ 482 wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); 483 484 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { 485 /* 486 * Enable logging of all errors. For P6 487 * processors, MC0_CTL is always enabled. 488 * 489 * XXX: Better CPU test needed here? 490 */ 491 if (!(i == 0 && (cpu_id & 0xf00) == 0x600)) 492 wrmsr(MSR_MC_CTL(i), 0xffffffffffffffffUL); 493 494 /* Clear all errors. */ 495 wrmsr(MSR_MC_STATUS(i), 0); 496 } 497 sched_unpin(); 498 } 499 500 load_cr4(rcr4() | CR4_MCE); 501} 502 503/* Called when a machine check exception fires. */ 504int 505mca_intr(void) 506{ 507 uint64_t mcg_status; 508 int recoverable; 509 510 if (!(cpu_feature & CPUID_MCA)) { 511 /* 512 * Just print the values of the old Pentium registers 513 * and panic. 514 */ 515 printf("MC Type: 0x%llx Address: 0x%llx\n", 516 rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR)); 517 return (0); 518 } 519 520 /* Scan the banks and check for any non-recoverable errors. */ 521 recoverable = mca_scan(1); 522 mcg_status = rdmsr(MSR_MCG_STATUS); 523 if (!(mcg_status & MCG_STATUS_RIPV)) 524 recoverable = 0; 525 526 /* Clear MCIP. */ 527 wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP); 528 return (recoverable); 529} 530