1/*	$NetBSD: apei.c,v 1.3 2024/03/26 22:01:03 rillig Exp $	*/
2
3/*-
4 * Copyright (c) 2024 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * APEI: ACPI Platform Error Interface
31 *
32 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html
33 *
34 * XXX dtrace probes
35 *
36 * XXX call _OSC appropriately to announce to the platform that we, the
37 * OSPM, support APEI
38 */
39
40#include <sys/cdefs.h>
41__KERNEL_RCSID(0, "$NetBSD: apei.c,v 1.3 2024/03/26 22:01:03 rillig Exp $");
42
43#include <sys/param.h>
44#include <sys/types.h>
45
46#include <sys/atomic.h>
47#include <sys/device.h>
48#include <sys/module.h>
49#include <sys/sysctl.h>
50#include <sys/uuid.h>
51
52#include <dev/acpi/acpireg.h>
53#include <dev/acpi/acpivar.h>
54#include <dev/acpi/apei_bertvar.h>
55#include <dev/acpi/apei_cper.h>
56#include <dev/acpi/apei_einjvar.h>
57#include <dev/acpi/apei_erstvar.h>
58#include <dev/acpi/apei_hestvar.h>
59#include <dev/acpi/apei_interp.h>
60#include <dev/acpi/apeivar.h>
61
62#define	_COMPONENT	ACPI_RESOURCE_COMPONENT
63ACPI_MODULE_NAME	("apei")
64
65static int apei_match(device_t, cfdata_t, void *);
66static void apei_attach(device_t, device_t, void *);
67static int apei_detach(device_t, int);
68
69static void apei_get_tables(struct apei_tab *);
70static void apei_put_tables(struct apei_tab *);
71
72static void apei_identify(struct apei_softc *, const char *,
73    const ACPI_TABLE_HEADER *);
74
75CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc),
76    apei_match, apei_attach, apei_detach, NULL);
77
78static int
79apei_match(device_t parent, cfdata_t match, void *aux)
80{
81	struct apei_tab tab;
82	int prio = 0;
83
84	/*
85	 * If we have any of the APEI tables, match.
86	 */
87	apei_get_tables(&tab);
88	if (tab.bert || tab.einj || tab.erst || tab.hest)
89		prio = 1;
90	apei_put_tables(&tab);
91
92	return prio;
93}
94
95static void
96apei_attach(device_t parent, device_t self, void *aux)
97{
98	struct apei_softc *sc = device_private(self);
99	const struct sysctlnode *sysctl_hw_acpi;
100	int error;
101
102	aprint_naive("\n");
103	aprint_normal(": ACPI Platform Error Interface\n");
104
105	pmf_device_register(self, NULL, NULL);
106
107	sc->sc_dev = self;
108	apei_get_tables(&sc->sc_tab);
109
110	/*
111	 * Get the sysctl hw.acpi node.  This should already be created
112	 * but I don't see an easy way to get at it.  If this fails,
113	 * something is seriously wrong, so let's stop here.
114	 */
115	error = sysctl_createv(&sc->sc_sysctllog, 0,
116	    NULL, &sysctl_hw_acpi, 0,
117	    CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0,
118	    CTL_HW, CTL_CREATE, CTL_EOL);
119	if (error) {
120		aprint_error_dev(sc->sc_dev,
121		    "failed to create sysctl hw.acpi: %d\n", error);
122		return;
123	}
124
125	/*
126	 * Create sysctl hw.acpi.apei.
127	 */
128	error = sysctl_createv(&sc->sc_sysctllog, 0,
129	    &sysctl_hw_acpi, &sc->sc_sysctlroot, 0,
130	    CTLTYPE_NODE, "apei",
131	    SYSCTL_DESCR("ACPI Platform Error Interface"),
132	    NULL, 0, NULL, 0,
133	    CTL_CREATE, CTL_EOL);
134	if (error) {
135		aprint_error_dev(sc->sc_dev,
136		    "failed to create sysctl hw.acpi.apei: %d\n", error);
137		return;
138	}
139
140	/*
141	 * Set up BERT, EINJ, ERST, and HEST.
142	 */
143	if (sc->sc_tab.bert) {
144		apei_identify(sc, "BERT", &sc->sc_tab.bert->Header);
145		apei_bert_attach(sc);
146	}
147	if (sc->sc_tab.einj) {
148		apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header);
149		apei_einj_attach(sc);
150	}
151	if (sc->sc_tab.erst) {
152		apei_identify(sc, "ERST", &sc->sc_tab.erst->Header);
153		apei_erst_attach(sc);
154	}
155	if (sc->sc_tab.hest) {
156		apei_identify(sc, "HEST", &sc->sc_tab.hest->Header);
157		apei_hest_attach(sc);
158	}
159}
160
161static int
162apei_detach(device_t self, int flags)
163{
164	struct apei_softc *sc = device_private(self);
165	int error;
166
167	/*
168	 * Detach children.  We don't currently have any but this is
169	 * harmless without children and mandatory if we ever sprouted
170	 * them, so let's just leave it here for good measure.
171	 *
172	 * After this point, we are committed to detaching; failure is
173	 * forbidden.
174	 */
175	error = config_detach_children(self, flags);
176	if (error)
177		return error;
178
179	/*
180	 * Tear down all the sysctl nodes first, before the software
181	 * state backing them goes away.
182	 */
183	sysctl_teardown(&sc->sc_sysctllog);
184	sc->sc_sysctlroot = NULL;
185
186	/*
187	 * Detach the software state for the APEI tables.
188	 */
189	if (sc->sc_tab.hest)
190		apei_hest_detach(sc);
191	if (sc->sc_tab.erst)
192		apei_erst_detach(sc);
193	if (sc->sc_tab.einj)
194		apei_einj_detach(sc);
195	if (sc->sc_tab.bert)
196		apei_bert_detach(sc);
197
198	/*
199	 * Release the APEI tables and we're done.
200	 */
201	apei_put_tables(&sc->sc_tab);
202	pmf_device_deregister(self);
203	return 0;
204}
205
206/*
207 * apei_get_tables(tab)
208 *
209 *	Get references to whichever APEI-related tables -- BERT, EINJ,
210 *	ERST, HEST -- are available in the system.
211 */
212static void
213apei_get_tables(struct apei_tab *tab)
214{
215	ACPI_STATUS rv;
216
217	/*
218	 * Probe the BERT -- Boot Error Record Table.
219	 */
220	rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert);
221	if (ACPI_FAILURE(rv))
222		tab->bert = NULL;
223
224	/*
225	 * Probe the EINJ -- Error Injection Table.
226	 */
227	rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj);
228	if (ACPI_FAILURE(rv))
229		tab->einj = NULL;
230
231	/*
232	 * Probe the ERST -- Error Record Serialization Table.
233	 */
234	rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst);
235	if (ACPI_FAILURE(rv))
236		tab->erst = NULL;
237
238	/*
239	 * Probe the HEST -- Hardware Error Source Table.
240	 */
241	rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest);
242	if (ACPI_FAILURE(rv))
243		tab->hest = NULL;
244}
245
246/*
247 * apei_put_tables(tab)
248 *
249 *	Release the tables acquired by apei_get_tables.
250 */
251static void
252apei_put_tables(struct apei_tab *tab)
253{
254
255	if (tab->bert != NULL) {
256		AcpiPutTable(&tab->bert->Header);
257		tab->bert = NULL;
258	}
259	if (tab->einj != NULL) {
260		AcpiPutTable(&tab->einj->Header);
261		tab->einj = NULL;
262	}
263	if (tab->erst != NULL) {
264		AcpiPutTable(&tab->erst->Header);
265		tab->erst = NULL;
266	}
267	if (tab->hest != NULL) {
268		AcpiPutTable(&tab->hest->Header);
269		tab->hest = NULL;
270	}
271}
272
273/*
274 * apei_identify(sc, name, header)
275 *
276 *	Identify the APEI-related table header for dmesg.
277 */
278static void
279apei_identify(struct apei_softc *sc, const char *name,
280    const ACPI_TABLE_HEADER *h)
281{
282
283	aprint_normal_dev(sc->sc_dev, "%s:"
284	    " OemId <%6.6s,%8.8s,%08x>"
285	    " AslId <%4.4s,%08x>\n",
286	    name,
287	    h->OemId, h->OemTableId, h->OemRevision,
288	    h->AslCompilerId, h->AslCompilerRevision);
289}
290
291/*
292 * apei_cper_guid_dec(buf, uuid)
293 *
294 *	Decode a Common Platform Error Record UUID/GUID from an ACPI
295 *	table at buf into a sys/uuid.h struct uuid.
296 */
297static void
298apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid)
299{
300
301	uuid_dec_le(buf, uuid);
302}
303
304/*
305 * apei_format_guid(uuid, s)
306 *
307 *	Format a UUID as a string.  This uses C initializer notation,
308 *	not UUID notation, in order to match the text in the UEFI
309 *	specification.
310 */
311static void
312apei_format_guid(const struct uuid *uuid, char guidstr[static 69])
313{
314
315	snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x,"
316	    "0x%02x%02x,"
317	    "{0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}",
318	    uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
319	    uuid->clock_seq_hi_and_reserved, uuid->clock_seq_hi_and_reserved,
320	    uuid->node[0], uuid->node[1], uuid->node[2],
321	    uuid->node[3], uuid->node[4], uuid->node[5]);
322}
323
324/*
325 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
326 */
327
328static const char *const cper_memory_error_type[] = {
329#define	F(LN, SN, V)	[LN] = #SN,
330	CPER_MEMORY_ERROR_TYPES(F)
331#undef	F
332};
333
334/*
335 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block
336 *
337 * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this
338 * but are designated as being intended for Generic Error Data Entries
339 * rather than Generic Error Status Blocks.
340 */
341static const char *const apei_gesb_severity[] = {
342	[0] = "recoverable",
343	[1] = "fatal",
344	[2] = "corrected",
345	[3] = "none",
346};
347
348/*
349 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry
350 */
351static const char *const apei_gede_severity[] = {
352	[ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable",
353	[ACPI_HEST_GEN_ERROR_FATAL] = "fatal",
354	[ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected",
355	[ACPI_HEST_GEN_ERROR_NONE] = "none",
356};
357
358/*
359 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section
360 */
361static const struct uuid CPER_MEMORY_ERROR_SECTION =
362    {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}};
363
364static void
365apei_cper_memory_error_report(struct apei_softc *sc, const void *buf,
366    size_t len, const char *ctx)
367{
368	const struct cper_memory_error *ME = buf;
369	char bitbuf[1024];
370
371	snprintb(bitbuf, sizeof(bitbuf),
372	    CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits);
373	aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf);
374	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) {
375		/*
376		 * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status
377		 */
378		/* XXX define this format somewhere */
379		snprintb(bitbuf, sizeof(bitbuf), "\177\020"
380		    "f\010\010"	"ErrorType\0"
381			"=\001"		"ERR_INTERNAL\0"
382			"=\004"		"ERR_MEM\0"
383			"=\005"		"ERR_TLB\0"
384			"=\006"		"ERR_CACHE\0"
385			"=\007"		"ERR_FUNCTION\0"
386			"=\010"		"ERR_SELFTEST\0"
387			"=\011"		"ERR_FLOW\0"
388			"=\020"		"ERR_BUS\0"
389			"=\021"		"ERR_MAP\0"
390			"=\022"		"ERR_IMPROPER\0"
391			"=\023"		"ERR_UNIMPL\0"
392			"=\024"		"ERR_LOL\0"
393			"=\025"		"ERR_RESPONSE\0"
394			"=\026"		"ERR_PARITY\0"
395			"=\027"		"ERR_PROTOCOL\0"
396			"=\030"		"ERR_ERROR\0"
397			"=\031"		"ERR_TIMEOUT\0"
398			"=\032"		"ERR_POISONED\0"
399		    "b\020"	"AddressError\0"
400		    "b\021"	"ControlError\0"
401		    "b\022"	"DataError\0"
402		    "b\023"	"ResponderDetected\0"
403		    "b\024"	"RequesterDetected\0"
404		    "b\025"	"FirstError\0"
405		    "b\026"	"Overflow\0"
406		    "\0", ME->ErrorStatus);
407		device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf);
408	}
409	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) {
410		device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n",
411		    ctx, ME->PhysicalAddress);
412	}
413	if (ME->ValidationBits &
414	    CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) {
415		device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64
416		    "\n", ctx, ME->PhysicalAddressMask);
417	}
418	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) {
419		device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx,
420		    ME->Node);
421	}
422	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) {
423		device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx,
424		    ME->Card);
425	}
426	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) {
427		device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx,
428		    ME->Module);
429	}
430	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) {
431		device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx,
432		    ME->Bank);
433	}
434	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) {
435		device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx,
436		    ME->Device);
437	}
438	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) {
439		device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx,
440		    ME->Row);
441	}
442	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) {
443		device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx,
444		    ME->Column);
445	}
446	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) {
447		device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n",
448		    ctx, ME->BitPosition);
449	}
450	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) {
451		device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n",
452		    ctx, ME->RequestorId);
453	}
454	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) {
455		device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n",
456		    ctx, ME->ResponderId);
457	}
458	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) {
459		device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n",
460		    ctx, ME->TargetId);
461	}
462	if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) {
463		const uint8_t t = ME->MemoryErrorType;
464		const char *n = t < __arraycount(cper_memory_error_type)
465		    ? cper_memory_error_type[t] : NULL;
466
467		if (n) {
468			device_printf(sc->sc_dev, "%s: MemoryErrorType=%d"
469			    " (%s)\n", ctx, t, n);
470		} else {
471			device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n",
472			    ctx, t);
473		}
474	}
475}
476
477/*
478 * apei_cper_reports
479 *
480 *	Table of known Common Platform Error Record types, symbolic
481 *	names, minimum data lengths, and functions to report them.
482 *
483 *	The section types and corresponding section layouts are listed
484 *	at:
485 *
486 *	https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html
487 */
488static const struct apei_cper_report {
489	const char *name;
490	const struct uuid *type;
491	size_t minlength;
492	void (*func)(struct apei_softc *, const void *, size_t, const char *);
493} apei_cper_reports[] = {
494	{ "memory", &CPER_MEMORY_ERROR_SECTION,
495	  sizeof(struct cper_memory_error),
496	  apei_cper_memory_error_report },
497};
498
499/*
500 * apei_gede_report_header(sc, gede, ctx, &headerlen, &report)
501 *
502 *	Report the header of the ith Generic Error Data Entry in the
503 *	given context.
504 *
505 *	Return the actual length of the header in headerlen, or 0 if
506 *	not known because the revision isn't recognized.
507 *
508 *	Return the report type in report, or NULL if not known because
509 *	the section type isn't recognized.
510 */
511static void
512apei_gede_report_header(struct apei_softc *sc,
513    const ACPI_HEST_GENERIC_DATA *gede, const char *ctx,
514    size_t *headerlenp, const struct apei_cper_report **reportp)
515{
516	const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede;
517	struct uuid sectype;
518	char guidstr[69];
519	char buf[128];
520	unsigned i;
521
522	/*
523	 * Print the section type as a C initializer.  It would be
524	 * prettier to use standard hyphenated UUID notation, but that
525	 * notation is slightly ambiguous here (two octets could be
526	 * written either way, depending on Microsoft convention --
527	 * which influenced ACPI and UEFI -- or internet convention),
528	 * and the UEFI spec writes the C initializer notation, so this
529	 * makes it easier to search for.
530	 *
531	 * Also print out a symbolic name, if we know it.
532	 */
533	apei_cper_guid_dec(gede->SectionType, &sectype);
534	apei_format_guid(&sectype, guidstr);
535	for (i = 0; i < __arraycount(apei_cper_reports); i++) {
536		const struct apei_cper_report *const report =
537		    &apei_cper_reports[i];
538
539		if (memcmp(&sectype, report->type, sizeof(sectype)) != 0)
540			continue;
541		device_printf(sc->sc_dev, "%s: SectionType=%s (%s error)\n",
542		    ctx, guidstr, report->name);
543		*reportp = report;
544		break;
545	}
546	if (i == __arraycount(apei_cper_reports)) {
547		device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx,
548		    guidstr);
549		*reportp = NULL;
550	}
551
552	/*
553	 * Print the numeric severity and, if we have it, a symbolic
554	 * name for it.
555	 */
556	device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n", ctx,
557	    gede->ErrorSeverity,
558	    (gede->ErrorSeverity < __arraycount(apei_gede_severity)
559		? apei_gede_severity[gede->ErrorSeverity]
560		: "unknown"));
561
562	/*
563	 * The Revision may not often be useful, but this is only ever
564	 * shown at the time of a hardware error report, not something
565	 * you can glean at your convenience with acpidump.  So print
566	 * it anyway.
567	 */
568	device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx,
569	    gede->Revision);
570
571	/*
572	 * Don't touch anything past the Revision until we've
573	 * determined we understand it.  Return the header length to
574	 * the caller, or return zero -- and stop here -- if we don't
575	 * know what the actual header length is.
576	 */
577	if (gede->Revision < 0x0300) {
578		*headerlenp = sizeof(*gede);
579	} else if (gede->Revision < 0x0400) {
580		*headerlenp = sizeof(*gede_v3);
581	} else {
582		*headerlenp = 0;
583		return;
584	}
585
586	/*
587	 * Print the validation bits at debug level.  Only really
588	 * helpful if there are bits we _don't_ know about.
589	 */
590	/* XXX define this format somewhere */
591	snprintb(buf, sizeof(buf), "\177\020"
592	    "b\000"	"FRU_ID\0"
593	    "b\001"	"FRU_TEXT\0" /* `FRU string', sometimes */
594	    "b\002"	"TIMESTAMP\0"
595	    "\0", gede->ValidationBits);
596	aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, buf);
597
598	/*
599	 * Print the CPER section flags.
600	 */
601	snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT, gede->Flags);
602	device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf);
603
604	/*
605	 * The ErrorDataLength is unlikely to be useful for the log, so
606	 * print it at debug level only.
607	 */
608	aprint_debug_dev(sc->sc_dev, "%s: ErrorDataLength=0x%"PRIu32"\n",
609	    ctx, gede->ErrorDataLength);
610
611	/*
612	 * Print the FRU Id and text, if available.
613	 */
614	if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) {
615		struct uuid fruid;
616
617		apei_cper_guid_dec(gede->FruId, &fruid);
618		apei_format_guid(&fruid, guidstr);
619		device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr);
620	}
621	if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) {
622		device_printf(sc->sc_dev, "%s: FruText=%.20s\n",
623		    ctx, gede->FruText);
624	}
625
626	/*
627	 * Print the timestamp, if available by the revision number and
628	 * the validation bits.
629	 */
630	if (gede->Revision >= 0x0300 && gede->Revision < 0x0400 &&
631	    gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
632		const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp;
633		const uint8_t s = t[0];
634		const uint8_t m = t[1];
635		const uint8_t h = t[2];
636		const uint8_t f = t[3];
637		const uint8_t D = t[4];
638		const uint8_t M = t[5];
639		const uint8_t Y = t[6];
640		const uint8_t C = t[7];
641
642		device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64
643		    " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n",
644		    ctx, gede_v3->TimeStamp,
645		    C,Y, M, D, h,m,s,
646		    f & __BIT(0) ? " (event time)" : " (collect time)");
647	}
648}
649
650/*
651 * apei_gesb_report(sc, gesb, size, ctx)
652 *
653 *	Check a Generic Error Status Block, of at most the specified
654 *	size in bytes, and report any errors in it.  Return the 32-bit
655 *	Block Status in case the caller needs it to acknowledge the
656 *	report to firmware.
657 */
658uint32_t
659apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb,
660    size_t size, const char *ctx, bool *fatalp)
661{
662	uint32_t status, unknownstatus, severity, nentries, i;
663	uint32_t datalen, rawdatalen;
664	const ACPI_HEST_GENERIC_DATA *gede0, *gede;
665	const unsigned char *rawdata;
666	char statusbuf[128];
667	bool fatal = false;
668
669	/*
670	 * Verify the buffer is large enough for a Generic Error Status
671	 * Block before we try to touch anything in it.
672	 */
673	if (size < sizeof(*gesb)) {
674		device_printf(sc->sc_dev, "%s: truncated GESB, %zu < %zu\n",
675		    ctx, size, sizeof(*gesb));
676		status = 0;
677		goto out;
678	}
679	size -= sizeof(*gesb);
680
681	/*
682	 * Load the status.  Access ordering rules are unclear in the
683	 * ACPI specification; I'm guessing that load-acquire of the
684	 * block status is a good idea before any other access to the
685	 * GESB.
686	 */
687	status = atomic_load_acquire(&gesb->BlockStatus);
688
689	/*
690	 * If there are no status bits set, the rest of the GESB is
691	 * garbage, so stop here.
692	 */
693	if (status == 0) {
694		/* XXX dtrace */
695		/* XXX DPRINTF */
696		goto out;
697	}
698
699	/* XXX define this format somewhere */
700	snprintb(statusbuf, sizeof(statusbuf), "\177\020"
701	    "b\000"	"UE\0"
702	    "b\001"	"CE\0"
703	    "b\002"	"MULTI_UE\0"
704	    "b\003"	"MULTI_CE\0"
705	    "f\004\010"	"GEDE_COUNT\0"
706	    "\0", status);
707
708	/*
709	 * Print a message to the console and dmesg about the severity
710	 * of the error.
711	 */
712	severity = gesb->ErrorSeverity;
713	nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT);
714	if (severity < __arraycount(apei_gesb_severity)) {
715		device_printf(sc->sc_dev, "%s reported hardware error:"
716		    " severity=%s nentries=%u status=%s\n",
717		    ctx, apei_gesb_severity[severity], nentries, statusbuf);
718	} else {
719		device_printf(sc->sc_dev, "%s reported error:"
720		    " severity=%"PRIu32" nentries=%u status=%s\n",
721		    ctx, severity, nentries, statusbuf);
722	}
723
724	/*
725	 * Make a determination about whether the error is fatal.
726	 *
727	 * XXX Currently we don't have any mechanism to recover from
728	 * uncorrectable but recoverable errors, so we treat those --
729	 * and anything else we don't recognize -- as fatal.
730	 */
731	switch (severity) {
732	case ACPI_HEST_GEN_ERROR_CORRECTED:
733	case ACPI_HEST_GEN_ERROR_NONE:
734		fatal = false;
735		break;
736	case ACPI_HEST_GEN_ERROR_FATAL:
737	case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */
738	default:
739		fatal = true;
740		break;
741	}
742
743	/*
744	 * Clear the bits we know about to warn if there's anything
745	 * left we don't understand.
746	 */
747	unknownstatus = status;
748	unknownstatus &= ~ACPI_HEST_UNCORRECTABLE;
749	unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE;
750	unknownstatus &= ~ACPI_HEST_CORRECTABLE;
751	unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE;
752	unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT;
753	if (unknownstatus != 0) {
754		/* XXX dtrace */
755		/* XXX rate-limit? */
756		device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:"
757		    " 0x%"PRIx32"\n", ctx, unknownstatus);
758	}
759
760	/*
761	 * Advance past the Generic Error Status Block (GESB) header to
762	 * the Generic Error Data Entries (GEDEs).
763	 */
764	gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1);
765
766	/*
767	 * Verify that the data length (GEDEs) fits within the size.
768	 * If not, truncate the GEDEs.
769	 */
770	datalen = gesb->DataLength;
771	if (size < datalen) {
772		device_printf(sc->sc_dev, "%s:"
773		    " GESB DataLength exceeds bounds: %zu < %"PRIu32"\n",
774		    ctx, size, datalen);
775		datalen = size;
776	}
777	size -= datalen;
778
779	/*
780	 * Report each of the Generic Error Data Entries.
781	 */
782	for (i = 0; i < nentries; i++) {
783		size_t headerlen;
784		const struct apei_cper_report *report;
785		char subctx[128];
786
787		/*
788		 * Format a subcontext to show this numbered entry of
789		 * the GESB.
790		 */
791		snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i);
792
793		/*
794		 * If the remaining GESB data length isn't enough for a
795		 * GEDE header, stop here.
796		 */
797		if (datalen < sizeof(*gede)) {
798			device_printf(sc->sc_dev, "%s:"
799			    " truncated GEDE: %"PRIu32" < %zu bytes\n",
800			    subctx, datalen, sizeof(*gede));
801			break;
802		}
803
804		/*
805		 * Print the GEDE header and get the full length (may
806		 * vary from revision to revision of the GEDE) and the
807		 * CPER report function if possible.
808		 */
809		apei_gede_report_header(sc, gede, subctx,
810		    &headerlen, &report);
811
812		/*
813		 * If we don't know the header length because of an
814		 * unfamiliar revision, stop here.
815		 */
816		if (headerlen == 0) {
817			device_printf(sc->sc_dev, "%s:"
818			    " unknown revision: 0x%"PRIx16"\n",
819			    subctx, gede->Revision);
820			break;
821		}
822
823		/*
824		 * Stop here if what we mapped is too small for the
825		 * error data length.
826		 */
827		datalen -= headerlen;
828		if (datalen < gede->ErrorDataLength) {
829			device_printf(sc->sc_dev, "%s: truncated GEDE payload:"
830			    " %"PRIu32" < %"PRIu32" bytes\n",
831			    subctx, datalen, gede->ErrorDataLength);
832			break;
833		}
834
835		/*
836		 * Report the Common Platform Error Record appendix to
837		 * this Generic Error Data Entry.
838		 */
839		if (report == NULL) {
840			device_printf(sc->sc_dev, "%s: [unknown type]\n", ctx);
841		} else {
842			(*report->func)(sc, (const char *)gede + headerlen,
843			    gede->ErrorDataLength, subctx);
844		}
845
846		/*
847		 * Advance past the GEDE header and CPER data to the
848		 * next GEDE.
849		 */
850		gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede +
851		    + headerlen + gede->ErrorDataLength);
852	}
853
854	/*
855	 * Advance past the Generic Error Data Entries (GEDEs) to the
856	 * raw error data.
857	 *
858	 * XXX Provide Max Raw Data Length as a parameter, as found in
859	 * various HEST entry types.
860	 */
861	rawdata = (const unsigned char *)gede0 + datalen;
862
863	/*
864	 * Verify that the raw data length fits within the size.  If
865	 * not, truncate the raw data.
866	 */
867	rawdatalen = gesb->RawDataLength;
868	if (size < rawdatalen) {
869		device_printf(sc->sc_dev, "%s:"
870		    " GESB RawDataLength exceeds bounds: %zu < %"PRIu32"\n",
871		    ctx, size, rawdatalen);
872		rawdatalen = size;
873	}
874	size -= rawdatalen;
875
876	/*
877	 * Hexdump the raw data, if any.
878	 */
879	if (rawdatalen > 0) {
880		char devctx[128];
881
882		snprintf(devctx, sizeof(devctx), "%s: %s: raw data",
883		    device_xname(sc->sc_dev), ctx);
884		hexdump(printf, devctx, rawdata, rawdatalen);
885	}
886
887	/*
888	 * If there's anything left after the raw data, warn.
889	 */
890	if (size > 0) {
891		device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n",
892		    ctx, size);
893	}
894
895	/*
896	 * Return the status so the caller can ack it, and tell the
897	 * caller whether this error is fatal.
898	 */
899out:	*fatalp = fatal;
900	return status;
901}
902
903MODULE(MODULE_CLASS_DRIVER, apei, NULL);
904
905#ifdef _MODULE
906#include "ioconf.c"
907#endif
908
909static int
910apei_modcmd(modcmd_t cmd, void *opaque)
911{
912	int error = 0;
913
914	switch (cmd) {
915	case MODULE_CMD_INIT:
916#ifdef _MODULE
917		error = config_init_component(cfdriver_ioconf_apei,
918		    cfattach_ioconf_apei, cfdata_ioconf_apei);
919#endif
920		return error;
921	case MODULE_CMD_FINI:
922#ifdef _MODULE
923		error = config_fini_component(cfdriver_ioconf_apei,
924		    cfattach_ioconf_apei, cfdata_ioconf_apei);
925#endif
926		return error;
927	default:
928		return ENOTTY;
929	}
930}
931