1/* $NetBSD: apei.c,v 1.3 2024/03/26 22:01:03 rillig Exp $ */ 2 3/*- 4 * Copyright (c) 2024 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* 30 * APEI: ACPI Platform Error Interface 31 * 32 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html 33 * 34 * XXX dtrace probes 35 * 36 * XXX call _OSC appropriately to announce to the platform that we, the 37 * OSPM, support APEI 38 */ 39 40#include <sys/cdefs.h> 41__KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.3 2024/03/26 22:01:03 rillig Exp $"); 42 43#include <sys/param.h> 44#include <sys/types.h> 45 46#include <sys/atomic.h> 47#include <sys/device.h> 48#include <sys/module.h> 49#include <sys/sysctl.h> 50#include <sys/uuid.h> 51 52#include <dev/acpi/acpireg.h> 53#include <dev/acpi/acpivar.h> 54#include <dev/acpi/apei_bertvar.h> 55#include <dev/acpi/apei_cper.h> 56#include <dev/acpi/apei_einjvar.h> 57#include <dev/acpi/apei_erstvar.h> 58#include <dev/acpi/apei_hestvar.h> 59#include <dev/acpi/apei_interp.h> 60#include <dev/acpi/apeivar.h> 61 62#define _COMPONENT ACPI_RESOURCE_COMPONENT 63ACPI_MODULE_NAME ("apei") 64 65static int apei_match(device_t, cfdata_t, void *); 66static void apei_attach(device_t, device_t, void *); 67static int apei_detach(device_t, int); 68 69static void apei_get_tables(struct apei_tab *); 70static void apei_put_tables(struct apei_tab *); 71 72static void apei_identify(struct apei_softc *, const char *, 73 const ACPI_TABLE_HEADER *); 74 75CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc), 76 apei_match, apei_attach, apei_detach, NULL); 77 78static int 79apei_match(device_t parent, cfdata_t match, void *aux) 80{ 81 struct apei_tab tab; 82 int prio = 0; 83 84 /* 85 * If we have any of the APEI tables, match. 86 */ 87 apei_get_tables(&tab); 88 if (tab.bert || tab.einj || tab.erst || tab.hest) 89 prio = 1; 90 apei_put_tables(&tab); 91 92 return prio; 93} 94 95static void 96apei_attach(device_t parent, device_t self, void *aux) 97{ 98 struct apei_softc *sc = device_private(self); 99 const struct sysctlnode *sysctl_hw_acpi; 100 int error; 101 102 aprint_naive("\n"); 103 aprint_normal(": ACPI Platform Error Interface\n"); 104 105 pmf_device_register(self, NULL, NULL); 106 107 sc->sc_dev = self; 108 apei_get_tables(&sc->sc_tab); 109 110 /* 111 * Get the sysctl hw.acpi node. This should already be created 112 * but I don't see an easy way to get at it. If this fails, 113 * something is seriously wrong, so let's stop here. 114 */ 115 error = sysctl_createv(&sc->sc_sysctllog, 0, 116 NULL, &sysctl_hw_acpi, 0, 117 CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0, 118 CTL_HW, CTL_CREATE, CTL_EOL); 119 if (error) { 120 aprint_error_dev(sc->sc_dev, 121 "failed to create sysctl hw.acpi: %d\n", error); 122 return; 123 } 124 125 /* 126 * Create sysctl hw.acpi.apei. 127 */ 128 error = sysctl_createv(&sc->sc_sysctllog, 0, 129 &sysctl_hw_acpi, &sc->sc_sysctlroot, 0, 130 CTLTYPE_NODE, "apei", 131 SYSCTL_DESCR("ACPI Platform Error Interface"), 132 NULL, 0, NULL, 0, 133 CTL_CREATE, CTL_EOL); 134 if (error) { 135 aprint_error_dev(sc->sc_dev, 136 "failed to create sysctl hw.acpi.apei: %d\n", error); 137 return; 138 } 139 140 /* 141 * Set up BERT, EINJ, ERST, and HEST. 142 */ 143 if (sc->sc_tab.bert) { 144 apei_identify(sc, "BERT", &sc->sc_tab.bert->Header); 145 apei_bert_attach(sc); 146 } 147 if (sc->sc_tab.einj) { 148 apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header); 149 apei_einj_attach(sc); 150 } 151 if (sc->sc_tab.erst) { 152 apei_identify(sc, "ERST", &sc->sc_tab.erst->Header); 153 apei_erst_attach(sc); 154 } 155 if (sc->sc_tab.hest) { 156 apei_identify(sc, "HEST", &sc->sc_tab.hest->Header); 157 apei_hest_attach(sc); 158 } 159} 160 161static int 162apei_detach(device_t self, int flags) 163{ 164 struct apei_softc *sc = device_private(self); 165 int error; 166 167 /* 168 * Detach children. We don't currently have any but this is 169 * harmless without children and mandatory if we ever sprouted 170 * them, so let's just leave it here for good measure. 171 * 172 * After this point, we are committed to detaching; failure is 173 * forbidden. 174 */ 175 error = config_detach_children(self, flags); 176 if (error) 177 return error; 178 179 /* 180 * Tear down all the sysctl nodes first, before the software 181 * state backing them goes away. 182 */ 183 sysctl_teardown(&sc->sc_sysctllog); 184 sc->sc_sysctlroot = NULL; 185 186 /* 187 * Detach the software state for the APEI tables. 188 */ 189 if (sc->sc_tab.hest) 190 apei_hest_detach(sc); 191 if (sc->sc_tab.erst) 192 apei_erst_detach(sc); 193 if (sc->sc_tab.einj) 194 apei_einj_detach(sc); 195 if (sc->sc_tab.bert) 196 apei_bert_detach(sc); 197 198 /* 199 * Release the APEI tables and we're done. 200 */ 201 apei_put_tables(&sc->sc_tab); 202 pmf_device_deregister(self); 203 return 0; 204} 205 206/* 207 * apei_get_tables(tab) 208 * 209 * Get references to whichever APEI-related tables -- BERT, EINJ, 210 * ERST, HEST -- are available in the system. 211 */ 212static void 213apei_get_tables(struct apei_tab *tab) 214{ 215 ACPI_STATUS rv; 216 217 /* 218 * Probe the BERT -- Boot Error Record Table. 219 */ 220 rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert); 221 if (ACPI_FAILURE(rv)) 222 tab->bert = NULL; 223 224 /* 225 * Probe the EINJ -- Error Injection Table. 226 */ 227 rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj); 228 if (ACPI_FAILURE(rv)) 229 tab->einj = NULL; 230 231 /* 232 * Probe the ERST -- Error Record Serialization Table. 233 */ 234 rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst); 235 if (ACPI_FAILURE(rv)) 236 tab->erst = NULL; 237 238 /* 239 * Probe the HEST -- Hardware Error Source Table. 240 */ 241 rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest); 242 if (ACPI_FAILURE(rv)) 243 tab->hest = NULL; 244} 245 246/* 247 * apei_put_tables(tab) 248 * 249 * Release the tables acquired by apei_get_tables. 250 */ 251static void 252apei_put_tables(struct apei_tab *tab) 253{ 254 255 if (tab->bert != NULL) { 256 AcpiPutTable(&tab->bert->Header); 257 tab->bert = NULL; 258 } 259 if (tab->einj != NULL) { 260 AcpiPutTable(&tab->einj->Header); 261 tab->einj = NULL; 262 } 263 if (tab->erst != NULL) { 264 AcpiPutTable(&tab->erst->Header); 265 tab->erst = NULL; 266 } 267 if (tab->hest != NULL) { 268 AcpiPutTable(&tab->hest->Header); 269 tab->hest = NULL; 270 } 271} 272 273/* 274 * apei_identify(sc, name, header) 275 * 276 * Identify the APEI-related table header for dmesg. 277 */ 278static void 279apei_identify(struct apei_softc *sc, const char *name, 280 const ACPI_TABLE_HEADER *h) 281{ 282 283 aprint_normal_dev(sc->sc_dev, "%s:" 284 " OemId <%6.6s,%8.8s,%08x>" 285 " AslId <%4.4s,%08x>\n", 286 name, 287 h->OemId, h->OemTableId, h->OemRevision, 288 h->AslCompilerId, h->AslCompilerRevision); 289} 290 291/* 292 * apei_cper_guid_dec(buf, uuid) 293 * 294 * Decode a Common Platform Error Record UUID/GUID from an ACPI 295 * table at buf into a sys/uuid.h struct uuid. 296 */ 297static void 298apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid) 299{ 300 301 uuid_dec_le(buf, uuid); 302} 303 304/* 305 * apei_format_guid(uuid, s) 306 * 307 * Format a UUID as a string. This uses C initializer notation, 308 * not UUID notation, in order to match the text in the UEFI 309 * specification. 310 */ 311static void 312apei_format_guid(const struct uuid *uuid, char guidstr[static 69]) 313{ 314 315 snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x," 316 "0x%02x%02x," 317 "{0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}", 318 uuid->time_low, uuid->time_mid, uuid->time_hi_and_version, 319 uuid->clock_seq_hi_and_reserved, uuid->clock_seq_hi_and_reserved, 320 uuid->node[0], uuid->node[1], uuid->node[2], 321 uuid->node[3], uuid->node[4], uuid->node[5]); 322} 323 324/* 325 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section 326 */ 327 328static const char *const cper_memory_error_type[] = { 329#define F(LN, SN, V) [LN] = #SN, 330 CPER_MEMORY_ERROR_TYPES(F) 331#undef F 332}; 333 334/* 335 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block 336 * 337 * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this 338 * but are designated as being intended for Generic Error Data Entries 339 * rather than Generic Error Status Blocks. 340 */ 341static const char *const apei_gesb_severity[] = { 342 [0] = "recoverable", 343 [1] = "fatal", 344 [2] = "corrected", 345 [3] = "none", 346}; 347 348/* 349 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry 350 */ 351static const char *const apei_gede_severity[] = { 352 [ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable", 353 [ACPI_HEST_GEN_ERROR_FATAL] = "fatal", 354 [ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected", 355 [ACPI_HEST_GEN_ERROR_NONE] = "none", 356}; 357 358/* 359 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section 360 */ 361static const struct uuid CPER_MEMORY_ERROR_SECTION = 362 {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}}; 363 364static void 365apei_cper_memory_error_report(struct apei_softc *sc, const void *buf, 366 size_t len, const char *ctx) 367{ 368 const struct cper_memory_error *ME = buf; 369 char bitbuf[1024]; 370 371 snprintb(bitbuf, sizeof(bitbuf), 372 CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits); 373 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf); 374 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) { 375 /* 376 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status 377 */ 378 /* XXX define this format somewhere */ 379 snprintb(bitbuf, sizeof(bitbuf), "\177\020" 380 "f\010\010" "ErrorType\0" 381 "=\001" "ERR_INTERNAL\0" 382 "=\004" "ERR_MEM\0" 383 "=\005" "ERR_TLB\0" 384 "=\006" "ERR_CACHE\0" 385 "=\007" "ERR_FUNCTION\0" 386 "=\010" "ERR_SELFTEST\0" 387 "=\011" "ERR_FLOW\0" 388 "=\020" "ERR_BUS\0" 389 "=\021" "ERR_MAP\0" 390 "=\022" "ERR_IMPROPER\0" 391 "=\023" "ERR_UNIMPL\0" 392 "=\024" "ERR_LOL\0" 393 "=\025" "ERR_RESPONSE\0" 394 "=\026" "ERR_PARITY\0" 395 "=\027" "ERR_PROTOCOL\0" 396 "=\030" "ERR_ERROR\0" 397 "=\031" "ERR_TIMEOUT\0" 398 "=\032" "ERR_POISONED\0" 399 "b\020" "AddressError\0" 400 "b\021" "ControlError\0" 401 "b\022" "DataError\0" 402 "b\023" "ResponderDetected\0" 403 "b\024" "RequesterDetected\0" 404 "b\025" "FirstError\0" 405 "b\026" "Overflow\0" 406 "\0", ME->ErrorStatus); 407 device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf); 408 } 409 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) { 410 device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n", 411 ctx, ME->PhysicalAddress); 412 } 413 if (ME->ValidationBits & 414 CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) { 415 device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64 416 "\n", ctx, ME->PhysicalAddressMask); 417 } 418 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) { 419 device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx, 420 ME->Node); 421 } 422 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) { 423 device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx, 424 ME->Card); 425 } 426 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) { 427 device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx, 428 ME->Module); 429 } 430 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) { 431 device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx, 432 ME->Bank); 433 } 434 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) { 435 device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx, 436 ME->Device); 437 } 438 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) { 439 device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx, 440 ME->Row); 441 } 442 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) { 443 device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx, 444 ME->Column); 445 } 446 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) { 447 device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n", 448 ctx, ME->BitPosition); 449 } 450 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) { 451 device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n", 452 ctx, ME->RequestorId); 453 } 454 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) { 455 device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n", 456 ctx, ME->ResponderId); 457 } 458 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) { 459 device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n", 460 ctx, ME->TargetId); 461 } 462 if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) { 463 const uint8_t t = ME->MemoryErrorType; 464 const char *n = t < __arraycount(cper_memory_error_type) 465 ? cper_memory_error_type[t] : NULL; 466 467 if (n) { 468 device_printf(sc->sc_dev, "%s: MemoryErrorType=%d" 469 " (%s)\n", ctx, t, n); 470 } else { 471 device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n", 472 ctx, t); 473 } 474 } 475} 476 477/* 478 * apei_cper_reports 479 * 480 * Table of known Common Platform Error Record types, symbolic 481 * names, minimum data lengths, and functions to report them. 482 * 483 * The section types and corresponding section layouts are listed 484 * at: 485 * 486 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html 487 */ 488static const struct apei_cper_report { 489 const char *name; 490 const struct uuid *type; 491 size_t minlength; 492 void (*func)(struct apei_softc *, const void *, size_t, const char *); 493} apei_cper_reports[] = { 494 { "memory", &CPER_MEMORY_ERROR_SECTION, 495 sizeof(struct cper_memory_error), 496 apei_cper_memory_error_report }, 497}; 498 499/* 500 * apei_gede_report_header(sc, gede, ctx, &headerlen, &report) 501 * 502 * Report the header of the ith Generic Error Data Entry in the 503 * given context. 504 * 505 * Return the actual length of the header in headerlen, or 0 if 506 * not known because the revision isn't recognized. 507 * 508 * Return the report type in report, or NULL if not known because 509 * the section type isn't recognized. 510 */ 511static void 512apei_gede_report_header(struct apei_softc *sc, 513 const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, 514 size_t *headerlenp, const struct apei_cper_report **reportp) 515{ 516 const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede; 517 struct uuid sectype; 518 char guidstr[69]; 519 char buf[128]; 520 unsigned i; 521 522 /* 523 * Print the section type as a C initializer. It would be 524 * prettier to use standard hyphenated UUID notation, but that 525 * notation is slightly ambiguous here (two octets could be 526 * written either way, depending on Microsoft convention -- 527 * which influenced ACPI and UEFI -- or internet convention), 528 * and the UEFI spec writes the C initializer notation, so this 529 * makes it easier to search for. 530 * 531 * Also print out a symbolic name, if we know it. 532 */ 533 apei_cper_guid_dec(gede->SectionType, §ype); 534 apei_format_guid(§ype, guidstr); 535 for (i = 0; i < __arraycount(apei_cper_reports); i++) { 536 const struct apei_cper_report *const report = 537 &apei_cper_reports[i]; 538 539 if (memcmp(§ype, report->type, sizeof(sectype)) != 0) 540 continue; 541 device_printf(sc->sc_dev, "%s: SectionType=%s (%s error)\n", 542 ctx, guidstr, report->name); 543 *reportp = report; 544 break; 545 } 546 if (i == __arraycount(apei_cper_reports)) { 547 device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx, 548 guidstr); 549 *reportp = NULL; 550 } 551 552 /* 553 * Print the numeric severity and, if we have it, a symbolic 554 * name for it. 555 */ 556 device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n", ctx, 557 gede->ErrorSeverity, 558 (gede->ErrorSeverity < __arraycount(apei_gede_severity) 559 ? apei_gede_severity[gede->ErrorSeverity] 560 : "unknown")); 561 562 /* 563 * The Revision may not often be useful, but this is only ever 564 * shown at the time of a hardware error report, not something 565 * you can glean at your convenience with acpidump. So print 566 * it anyway. 567 */ 568 device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx, 569 gede->Revision); 570 571 /* 572 * Don't touch anything past the Revision until we've 573 * determined we understand it. Return the header length to 574 * the caller, or return zero -- and stop here -- if we don't 575 * know what the actual header length is. 576 */ 577 if (gede->Revision < 0x0300) { 578 *headerlenp = sizeof(*gede); 579 } else if (gede->Revision < 0x0400) { 580 *headerlenp = sizeof(*gede_v3); 581 } else { 582 *headerlenp = 0; 583 return; 584 } 585 586 /* 587 * Print the validation bits at debug level. Only really 588 * helpful if there are bits we _don't_ know about. 589 */ 590 /* XXX define this format somewhere */ 591 snprintb(buf, sizeof(buf), "\177\020" 592 "b\000" "FRU_ID\0" 593 "b\001" "FRU_TEXT\0" /* `FRU string', sometimes */ 594 "b\002" "TIMESTAMP\0" 595 "\0", gede->ValidationBits); 596 aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, buf); 597 598 /* 599 * Print the CPER section flags. 600 */ 601 snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT, gede->Flags); 602 device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf); 603 604 /* 605 * The ErrorDataLength is unlikely to be useful for the log, so 606 * print it at debug level only. 607 */ 608 aprint_debug_dev(sc->sc_dev, "%s: ErrorDataLength=0x%"PRIu32"\n", 609 ctx, gede->ErrorDataLength); 610 611 /* 612 * Print the FRU Id and text, if available. 613 */ 614 if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) { 615 struct uuid fruid; 616 617 apei_cper_guid_dec(gede->FruId, &fruid); 618 apei_format_guid(&fruid, guidstr); 619 device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr); 620 } 621 if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) { 622 device_printf(sc->sc_dev, "%s: FruText=%.20s\n", 623 ctx, gede->FruText); 624 } 625 626 /* 627 * Print the timestamp, if available by the revision number and 628 * the validation bits. 629 */ 630 if (gede->Revision >= 0x0300 && gede->Revision < 0x0400 && 631 gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) { 632 const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp; 633 const uint8_t s = t[0]; 634 const uint8_t m = t[1]; 635 const uint8_t h = t[2]; 636 const uint8_t f = t[3]; 637 const uint8_t D = t[4]; 638 const uint8_t M = t[5]; 639 const uint8_t Y = t[6]; 640 const uint8_t C = t[7]; 641 642 device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64 643 " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n", 644 ctx, gede_v3->TimeStamp, 645 C,Y, M, D, h,m,s, 646 f & __BIT(0) ? " (event time)" : " (collect time)"); 647 } 648} 649 650/* 651 * apei_gesb_report(sc, gesb, size, ctx) 652 * 653 * Check a Generic Error Status Block, of at most the specified 654 * size in bytes, and report any errors in it. Return the 32-bit 655 * Block Status in case the caller needs it to acknowledge the 656 * report to firmware. 657 */ 658uint32_t 659apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb, 660 size_t size, const char *ctx, bool *fatalp) 661{ 662 uint32_t status, unknownstatus, severity, nentries, i; 663 uint32_t datalen, rawdatalen; 664 const ACPI_HEST_GENERIC_DATA *gede0, *gede; 665 const unsigned char *rawdata; 666 char statusbuf[128]; 667 bool fatal = false; 668 669 /* 670 * Verify the buffer is large enough for a Generic Error Status 671 * Block before we try to touch anything in it. 672 */ 673 if (size < sizeof(*gesb)) { 674 device_printf(sc->sc_dev, "%s: truncated GESB, %zu < %zu\n", 675 ctx, size, sizeof(*gesb)); 676 status = 0; 677 goto out; 678 } 679 size -= sizeof(*gesb); 680 681 /* 682 * Load the status. Access ordering rules are unclear in the 683 * ACPI specification; I'm guessing that load-acquire of the 684 * block status is a good idea before any other access to the 685 * GESB. 686 */ 687 status = atomic_load_acquire(&gesb->BlockStatus); 688 689 /* 690 * If there are no status bits set, the rest of the GESB is 691 * garbage, so stop here. 692 */ 693 if (status == 0) { 694 /* XXX dtrace */ 695 /* XXX DPRINTF */ 696 goto out; 697 } 698 699 /* XXX define this format somewhere */ 700 snprintb(statusbuf, sizeof(statusbuf), "\177\020" 701 "b\000" "UE\0" 702 "b\001" "CE\0" 703 "b\002" "MULTI_UE\0" 704 "b\003" "MULTI_CE\0" 705 "f\004\010" "GEDE_COUNT\0" 706 "\0", status); 707 708 /* 709 * Print a message to the console and dmesg about the severity 710 * of the error. 711 */ 712 severity = gesb->ErrorSeverity; 713 nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT); 714 if (severity < __arraycount(apei_gesb_severity)) { 715 device_printf(sc->sc_dev, "%s reported hardware error:" 716 " severity=%s nentries=%u status=%s\n", 717 ctx, apei_gesb_severity[severity], nentries, statusbuf); 718 } else { 719 device_printf(sc->sc_dev, "%s reported error:" 720 " severity=%"PRIu32" nentries=%u status=%s\n", 721 ctx, severity, nentries, statusbuf); 722 } 723 724 /* 725 * Make a determination about whether the error is fatal. 726 * 727 * XXX Currently we don't have any mechanism to recover from 728 * uncorrectable but recoverable errors, so we treat those -- 729 * and anything else we don't recognize -- as fatal. 730 */ 731 switch (severity) { 732 case ACPI_HEST_GEN_ERROR_CORRECTED: 733 case ACPI_HEST_GEN_ERROR_NONE: 734 fatal = false; 735 break; 736 case ACPI_HEST_GEN_ERROR_FATAL: 737 case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */ 738 default: 739 fatal = true; 740 break; 741 } 742 743 /* 744 * Clear the bits we know about to warn if there's anything 745 * left we don't understand. 746 */ 747 unknownstatus = status; 748 unknownstatus &= ~ACPI_HEST_UNCORRECTABLE; 749 unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE; 750 unknownstatus &= ~ACPI_HEST_CORRECTABLE; 751 unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE; 752 unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT; 753 if (unknownstatus != 0) { 754 /* XXX dtrace */ 755 /* XXX rate-limit? */ 756 device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:" 757 " 0x%"PRIx32"\n", ctx, unknownstatus); 758 } 759 760 /* 761 * Advance past the Generic Error Status Block (GESB) header to 762 * the Generic Error Data Entries (GEDEs). 763 */ 764 gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1); 765 766 /* 767 * Verify that the data length (GEDEs) fits within the size. 768 * If not, truncate the GEDEs. 769 */ 770 datalen = gesb->DataLength; 771 if (size < datalen) { 772 device_printf(sc->sc_dev, "%s:" 773 " GESB DataLength exceeds bounds: %zu < %"PRIu32"\n", 774 ctx, size, datalen); 775 datalen = size; 776 } 777 size -= datalen; 778 779 /* 780 * Report each of the Generic Error Data Entries. 781 */ 782 for (i = 0; i < nentries; i++) { 783 size_t headerlen; 784 const struct apei_cper_report *report; 785 char subctx[128]; 786 787 /* 788 * Format a subcontext to show this numbered entry of 789 * the GESB. 790 */ 791 snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i); 792 793 /* 794 * If the remaining GESB data length isn't enough for a 795 * GEDE header, stop here. 796 */ 797 if (datalen < sizeof(*gede)) { 798 device_printf(sc->sc_dev, "%s:" 799 " truncated GEDE: %"PRIu32" < %zu bytes\n", 800 subctx, datalen, sizeof(*gede)); 801 break; 802 } 803 804 /* 805 * Print the GEDE header and get the full length (may 806 * vary from revision to revision of the GEDE) and the 807 * CPER report function if possible. 808 */ 809 apei_gede_report_header(sc, gede, subctx, 810 &headerlen, &report); 811 812 /* 813 * If we don't know the header length because of an 814 * unfamiliar revision, stop here. 815 */ 816 if (headerlen == 0) { 817 device_printf(sc->sc_dev, "%s:" 818 " unknown revision: 0x%"PRIx16"\n", 819 subctx, gede->Revision); 820 break; 821 } 822 823 /* 824 * Stop here if what we mapped is too small for the 825 * error data length. 826 */ 827 datalen -= headerlen; 828 if (datalen < gede->ErrorDataLength) { 829 device_printf(sc->sc_dev, "%s: truncated GEDE payload:" 830 " %"PRIu32" < %"PRIu32" bytes\n", 831 subctx, datalen, gede->ErrorDataLength); 832 break; 833 } 834 835 /* 836 * Report the Common Platform Error Record appendix to 837 * this Generic Error Data Entry. 838 */ 839 if (report == NULL) { 840 device_printf(sc->sc_dev, "%s: [unknown type]\n", ctx); 841 } else { 842 (*report->func)(sc, (const char *)gede + headerlen, 843 gede->ErrorDataLength, subctx); 844 } 845 846 /* 847 * Advance past the GEDE header and CPER data to the 848 * next GEDE. 849 */ 850 gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede + 851 + headerlen + gede->ErrorDataLength); 852 } 853 854 /* 855 * Advance past the Generic Error Data Entries (GEDEs) to the 856 * raw error data. 857 * 858 * XXX Provide Max Raw Data Length as a parameter, as found in 859 * various HEST entry types. 860 */ 861 rawdata = (const unsigned char *)gede0 + datalen; 862 863 /* 864 * Verify that the raw data length fits within the size. If 865 * not, truncate the raw data. 866 */ 867 rawdatalen = gesb->RawDataLength; 868 if (size < rawdatalen) { 869 device_printf(sc->sc_dev, "%s:" 870 " GESB RawDataLength exceeds bounds: %zu < %"PRIu32"\n", 871 ctx, size, rawdatalen); 872 rawdatalen = size; 873 } 874 size -= rawdatalen; 875 876 /* 877 * Hexdump the raw data, if any. 878 */ 879 if (rawdatalen > 0) { 880 char devctx[128]; 881 882 snprintf(devctx, sizeof(devctx), "%s: %s: raw data", 883 device_xname(sc->sc_dev), ctx); 884 hexdump(printf, devctx, rawdata, rawdatalen); 885 } 886 887 /* 888 * If there's anything left after the raw data, warn. 889 */ 890 if (size > 0) { 891 device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n", 892 ctx, size); 893 } 894 895 /* 896 * Return the status so the caller can ack it, and tell the 897 * caller whether this error is fatal. 898 */ 899out: *fatalp = fatal; 900 return status; 901} 902 903MODULE(MODULE_CLASS_DRIVER, apei, NULL); 904 905#ifdef _MODULE 906#include "ioconf.c" 907#endif 908 909static int 910apei_modcmd(modcmd_t cmd, void *opaque) 911{ 912 int error = 0; 913 914 switch (cmd) { 915 case MODULE_CMD_INIT: 916#ifdef _MODULE 917 error = config_init_component(cfdriver_ioconf_apei, 918 cfattach_ioconf_apei, cfdata_ioconf_apei); 919#endif 920 return error; 921 case MODULE_CMD_FINI: 922#ifdef _MODULE 923 error = config_fini_component(cfdriver_ioconf_apei, 924 cfattach_ioconf_apei, cfdata_ioconf_apei); 925#endif 926 return error; 927 default: 928 return ENOTTY; 929 } 930} 931