1/*	$NetBSD: apei_hest.c,v 1.3 2024/03/21 02:35:09 riastradh Exp $	*/
2
3/*-
4 * Copyright (c) 2024 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * APEI HEST -- Hardware Error Source Table
31 *
32 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#acpi-error-source
33 *
34 * XXX uncorrectable error NMI comes in on all CPUs at once, what to do?
35 *
36 * XXX AMD MCA
37 *
38 * XXX IA32 machine check stuff
39 *
40 * XXX switch-to-polling for GHES notifications
41 *
42 * XXX error threshold for GHES notifications
43 *
44 * XXX sort out interrupt notification types, e.g. do we ever need to
45 * do acpi_intr_establish?
46 *
47 * XXX sysctl knob to force polling each particular error source that
48 * supports it
49 *
50 * XXX consider a lighter-weight polling schedule for machines with
51 * thousands of polled GHESes
52 */
53
54#include <sys/cdefs.h>
55__KERNEL_RCSID(0, "$NetBSD: apei_hest.c,v 1.3 2024/03/21 02:35:09 riastradh Exp $");
56
57#include <sys/types.h>
58
59#include <sys/atomic.h>
60#include <sys/kmem.h>
61#include <sys/lock.h>
62#include <sys/systm.h>
63
64#include <dev/acpi/acpivar.h>
65#include <dev/acpi/apei_cper.h>
66#include <dev/acpi/apei_hestvar.h>
67#include <dev/acpi/apei_hed.h>
68#include <dev/acpi/apei_mapreg.h>
69#include <dev/acpi/apeivar.h>
70
71#if defined(__i386__) || defined(__x86_64__)
72#include <x86/nmi.h>
73#endif
74
75#include "ioconf.h"
76
77#define	_COMPONENT	ACPI_RESOURCE_COMPONENT
78ACPI_MODULE_NAME	("apei")
79
80/*
81 * apei_hest_ghes_handle(sc, src)
82 *
83 *	Check for, report, and acknowledge any error from a Generic
84 *	Hardware Error Source (GHES, not GHESv2).  Return true if there
85 *	was any error to report, false if not.
86 */
87static bool
88apei_hest_ghes_handle(struct apei_softc *sc, struct apei_source *src)
89{
90	ACPI_HEST_GENERIC *ghes = container_of(src->as_header,
91	    ACPI_HEST_GENERIC, Header);
92	ACPI_HEST_GENERIC_STATUS *gesb = src->as_ghes.gesb;
93	char ctx[sizeof("error source 65535")];
94	uint32_t status;
95	bool fatal = false;
96
97	/*
98	 * Process and report any error.
99	 */
100	snprintf(ctx, sizeof(ctx), "error source %"PRIu16,
101	    ghes->Header.SourceId);
102	status = apei_gesb_report(sc, src->as_ghes.gesb,
103	    ghes->ErrorBlockLength, ctx, &fatal);
104
105	/*
106	 * Acknowledge the error by clearing the block status.  To
107	 * avoid races, we probably have to avoid further access to the
108	 * GESB until we get another notification.
109	 *
110	 * As a precaution, we zero this with atomic compare-and-swap
111	 * so at least we can see if the status changed while we were
112	 * working on it.
113	 *
114	 * It is tempting to clear bits with atomic and-complement, but
115	 * the BlockStatus is not just a bit mask -- bits [13:4] are a
116	 * count of Generic Error Data Entries, and who knows what bits
117	 * [31:14] might be used for in the future.
118	 *
119	 * XXX The GHES(v1) protocol is unclear from the specification
120	 * here.  The GHESv2 protocol has a separate register write to
121	 * acknowledge, which is a bit clearer.
122	 */
123	membar_release();
124	const uint32_t status1 = atomic_cas_32(&gesb->BlockStatus, status, 0);
125	if (status1 != status) {
126		device_printf(sc->sc_dev, "%s: status changed from"
127		    " 0x%"PRIx32" to 0x%"PRIx32"\n",
128		    ctx, status, status1);
129	}
130
131	/*
132	 * If the error was fatal, panic now.
133	 */
134	if (fatal)
135		panic("fatal hardware error");
136
137	return status != 0;
138}
139
140/*
141 * apei_hest_ghes_v2_handle(sc, src)
142 *
143 *	Check for, report, and acknowledge any error from a Generic
144 *	Hardware Error Source v2.  Return true if there was any error
145 *	to report, false if not.
146 */
147static bool
148apei_hest_ghes_v2_handle(struct apei_softc *sc, struct apei_source *src)
149{
150	ACPI_HEST_GENERIC_V2 *ghes_v2 = container_of(src->as_header,
151	    ACPI_HEST_GENERIC_V2, Header);
152	ACPI_HEST_GENERIC_STATUS *gesb = src->as_ghes.gesb;
153	char ctx[sizeof("error source 65535")];
154	uint64_t X;
155	uint32_t status;
156	bool fatal;
157
158	/*
159	 * Process and report any error.
160	 */
161	snprintf(ctx, sizeof(ctx), "error source %"PRIu16,
162	    ghes_v2->Header.SourceId);
163	status = apei_gesb_report(sc, src->as_ghes.gesb,
164	    ghes_v2->ErrorBlockLength, ctx, &fatal);
165
166	/*
167	 * First clear the block status.  As a precaution, we zero this
168	 * with atomic compare-and-swap so at least we can see if the
169	 * status changed while we were working on it.
170	 */
171	membar_release();
172	const uint32_t status1 = atomic_cas_32(&gesb->BlockStatus, status, 0);
173	if (status1 != status) {
174		device_printf(sc->sc_dev, "%s: status changed from"
175		    " 0x%"PRIx32" to 0x%"PRIx32"\n",
176		    ctx, status, status1);
177	}
178
179	/*
180	 * Next, do the Read Ack dance.
181	 *
182	 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-hardware-error-source-version-2-ghesv2-type-10
183	 */
184	X = apei_mapreg_read(&ghes_v2->ReadAckRegister,
185	    src->as_ghes_v2.read_ack);
186	X &= ghes_v2->ReadAckPreserve;
187	X |= ghes_v2->ReadAckWrite;
188	apei_mapreg_write(&ghes_v2->ReadAckRegister,
189	    src->as_ghes_v2.read_ack, X);
190
191	/*
192	 * If the error was fatal, panic now.
193	 */
194	if (fatal)
195		panic("fatal hardware error");
196
197	return status != 0;
198}
199
200/*
201 * apei_hest_ghes_poll(cookie)
202 *
203 *	Callout handler for periodic polling of a Generic Hardware
204 *	Error Source (GHES, not GHESv2), using Notification Type `0 -
205 *	Polled'.
206 *
207 *	cookie is the struct apei_source pointer for a single source;
208 *	if there are multiple sources there will be multiple callouts.
209 */
210static void
211apei_hest_ghes_poll(void *cookie)
212{
213	struct apei_source *src = cookie;
214	struct apei_softc *sc = src->as_sc;
215	ACPI_HEST_GENERIC *ghes = container_of(src->as_header,
216	    ACPI_HEST_GENERIC, Header);
217
218	/*
219	 * Process and acknowledge any error.
220	 */
221	(void)apei_hest_ghes_handle(sc, src);
222
223	/*
224	 * Schedule polling again after the firmware-suggested
225	 * interval.
226	 */
227	callout_schedule(&src->as_ch,
228	    MAX(1, mstohz(ghes->Notify.PollInterval)));
229}
230
231/*
232 * apei_hest_ghes_v2_poll(cookie)
233 *
234 *	Callout handler for periodic polling of a Generic Hardware
235 *	Error Source v2, using Notification Type `0 - Polled'.
236 *
237 *	cookie is the struct apei_source pointer for a single source;
238 *	if there are multiple sources there will be multiple callouts.
239 */
240static void
241apei_hest_ghes_v2_poll(void *cookie)
242{
243	struct apei_source *src = cookie;
244	struct apei_softc *sc = src->as_sc;
245	ACPI_HEST_GENERIC_V2 *ghes_v2 = container_of(src->as_header,
246	    ACPI_HEST_GENERIC_V2, Header);
247
248	/*
249	 * Process and acknowledge any error.
250	 */
251	(void)apei_hest_ghes_v2_handle(sc, src);
252
253	/*
254	 * Schedule polling again after the firmware-suggested
255	 * interval.
256	 */
257	callout_schedule(&src->as_ch,
258	    MAX(1, mstohz(ghes_v2->Notify.PollInterval)));
259}
260
261#if defined(__i386__) || defined(__x86_64__)
262
263/*
264 * The NMI is (sometimes?) delivered to all CPUs at once.  To reduce
265 * confusion, let's try to have only one CPU process error
266 * notifications at a time.
267 */
268static __cpu_simple_lock_t apei_hest_nmi_lock;
269
270/*
271 * apei_hest_ghes_nmi(tf, cookie)
272 *
273 *	Nonmaskable interrupt handler for Generic Hardware Error
274 *	Sources (GHES, not GHESv2) with Notification Type `4 - NMI'.
275 */
276static int
277apei_hest_ghes_nmi(const struct trapframe *tf, void *cookie)
278{
279	struct apei_source *src = cookie;
280	struct apei_softc *sc = src->as_sc;
281
282	__cpu_simple_lock(&apei_hest_nmi_lock);
283	const bool mine = apei_hest_ghes_handle(sc, src);
284	__cpu_simple_unlock(&apei_hest_nmi_lock);
285
286	/*
287	 * Tell the NMI subsystem whether this interrupt could have
288	 * been for us or not.
289	 */
290	return mine;
291}
292
293/*
294 * apei_hest_ghes_v2_nmi(tf, cookie)
295 *
296 *	Nonmaskable interrupt handler for Generic Hardware Error
297 *	Sources v2 with Notification Type `4 - NMI'.
298 */
299static int
300apei_hest_ghes_v2_nmi(const struct trapframe *tf, void *cookie)
301{
302	struct apei_source *src = cookie;
303	struct apei_softc *sc = src->as_sc;
304
305	__cpu_simple_lock(&apei_hest_nmi_lock);
306	const bool mine = apei_hest_ghes_v2_handle(sc, src);
307	__cpu_simple_unlock(&apei_hest_nmi_lock);
308
309	/*
310	 * Tell the NMI subsystem whether this interrupt could have
311	 * been for us or not.
312	 */
313	return mine;
314}
315
316#endif	/* defined(__i386__) || defined(__x86_64__) */
317
318/*
319 * apei_hest_attach_ghes(sc, ghes, i)
320 *
321 *	Attach a Generic Hardware Error Source (GHES, not GHESv2) as
322 *	the ith source in the Hardware Error Source Table.
323 *
324 *	After this point, the system will check for and handle errors
325 *	when notified by this source.
326 */
327static void
328apei_hest_attach_ghes(struct apei_softc *sc, ACPI_HEST_GENERIC *ghes,
329    uint32_t i)
330{
331	struct apei_hest_softc *hsc = &sc->sc_hest;
332	struct apei_source *src = &hsc->hsc_source[i];
333	uint64_t addr;
334	ACPI_STATUS rv;
335	char ctx[sizeof("HEST[4294967295, Id=65535]")];
336
337	snprintf(ctx, sizeof(ctx), "HEST[%"PRIu32", Id=%"PRIu16"]",
338	    i, ghes->Header.SourceId);
339
340	/*
341	 * Verify the source is enabled before proceeding.  The Enabled
342	 * field is 8 bits with 256 possibilities, but only two of the
343	 * possibilities, 0 and 1, have semantics defined in the spec,
344	 * so out of an abundance of caution let's tread carefully in
345	 * case anything changes and noisily reject any values other
346	 * than 1.
347	 */
348	switch (ghes->Enabled) {
349	case 1:
350		break;
351	case 0:
352		aprint_debug_dev(sc->sc_dev, "%s: disabled\n", ctx);
353		return;
354	default:
355		aprint_error_dev(sc->sc_dev, "%s: unknown GHES Enabled state:"
356		    " 0x%"PRIx8"\n", ctx, ghes->Enabled);
357		return;
358	}
359
360	/*
361	 * Verify the Error Status Address bit width is at most 64 bits
362	 * before proceeding with this source.  When we get 128-bit
363	 * addressing, this code will have to be updated.
364	 */
365	if (ghes->ErrorStatusAddress.BitWidth > 64) {
366		aprint_error_dev(sc->sc_dev, "%s: excessive address bits:"
367		    " %"PRIu8"\n", ctx, ghes->ErrorStatusAddress.BitWidth);
368		return;
369	}
370
371	/*
372	 * Read the GHES Error Status Addresss.  This is the physical
373	 * address of a GESB, Generic Error Status Block.  Why the
374	 * physical address is exposed via this indirection, and not
375	 * simply stored directly in the GHES, is unclear to me.
376	 * Hoping it's not because the address can change dynamically,
377	 * because the error handling path shouldn't involve mapping
378	 * anything.
379	 */
380	rv = AcpiRead(&addr, &ghes->ErrorStatusAddress);
381	if (ACPI_FAILURE(rv)) {
382		aprint_error_dev(sc->sc_dev, "%s:"
383		    " failed to read error status address: %s", ctx,
384		    AcpiFormatException(rv));
385		return;
386	}
387	aprint_debug_dev(sc->sc_dev, "%s: error status @ 0x%"PRIx64"\n", ctx,
388	    addr);
389
390	/*
391	 * Initialize the source and map the GESB so we can get at it
392	 * in the error handling path.
393	 */
394	src->as_sc = sc;
395	src->as_header = &ghes->Header;
396	src->as_ghes.gesb = AcpiOsMapMemory(addr, ghes->ErrorBlockLength);
397
398	/*
399	 * Arrange to receive notifications.
400	 */
401	switch (ghes->Notify.Type) {
402	case ACPI_HEST_NOTIFY_POLLED:
403		callout_init(&src->as_ch, CALLOUT_MPSAFE);
404		callout_setfunc(&src->as_ch, &apei_hest_ghes_poll, src);
405		callout_schedule(&src->as_ch, 0);
406		break;
407	case ACPI_HEST_NOTIFY_SCI:
408	case ACPI_HEST_NOTIFY_GPIO:
409		/*
410		 * SCI and GPIO notifications are delivered through
411		 * Hardware Error Device (PNP0C33) events.
412		 *
413		 * XXX Where is this spelled out?  The text at
414		 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#event-notification-for-generic-error-sources
415		 * is vague.
416		 */
417		SIMPLEQ_INSERT_TAIL(&hsc->hsc_hed_list, src, as_entry);
418		break;
419#if defined(__i386__) || defined(__x86_64__)
420	case ACPI_HEST_NOTIFY_NMI:
421		src->as_nmi = nmi_establish(&apei_hest_ghes_nmi, src);
422		break;
423#endif
424	}
425
426	/*
427	 * Now that we have notification set up, process and
428	 * acknowledge the initial GESB report if any.
429	 */
430	apei_hest_ghes_handle(sc, src);
431}
432
433/*
434 * apei_hest_detach_ghes(sc, ghes, i)
435 *
436 *	Detach the ith source, which is a Generic Hardware Error Source
437 *	(GHES, not GHESv2).
438 *
439 *	After this point, the system will ignore notifications from
440 *	this source.
441 */
442static void
443apei_hest_detach_ghes(struct apei_softc *sc, ACPI_HEST_GENERIC *ghes,
444    uint32_t i)
445{
446	struct apei_hest_softc *hsc = &sc->sc_hest;
447	struct apei_source *src = &hsc->hsc_source[i];
448
449	/*
450	 * Arrange to stop receiving notifications.
451	 */
452	switch (ghes->Notify.Type) {
453	case ACPI_HEST_NOTIFY_POLLED:
454		callout_halt(&src->as_ch, NULL);
455		callout_destroy(&src->as_ch);
456		break;
457	case ACPI_HEST_NOTIFY_SCI:
458	case ACPI_HEST_NOTIFY_GPIO:
459		/*
460		 * No need to spend time removing the entry; no further
461		 * calls via apei_hed_notify are possible at this
462		 * point, now that detach has begun.
463		 */
464		break;
465#if defined(__i386__) || defined(__x86_64__)
466	case ACPI_HEST_NOTIFY_NMI:
467		nmi_disestablish(src->as_nmi);
468		src->as_nmi = NULL;
469		break;
470#endif
471	}
472
473	/*
474	 * No more notifications.  Unmap the GESB and destroy the
475	 * interrupt source now that it will no longer be used in
476	 * error handling path.
477	 */
478	AcpiOsUnmapMemory(src->as_ghes.gesb, ghes->ErrorBlockLength);
479	src->as_ghes.gesb = NULL;
480	src->as_header = NULL;
481	src->as_sc = NULL;
482}
483
484
485/*
486 * apei_hest_attach_ghes_v2(sc, ghes_v2, i)
487 *
488 *	Attach a Generic Hardware Error Source v2 as the ith source in
489 *	the Hardware Error Source Table.
490 *
491 *	After this point, the system will check for and handle errors
492 *	when notified by this source.
493 */
494static void
495apei_hest_attach_ghes_v2(struct apei_softc *sc, ACPI_HEST_GENERIC_V2 *ghes_v2,
496    uint32_t i)
497{
498	struct apei_hest_softc *hsc = &sc->sc_hest;
499	struct apei_source *src = &hsc->hsc_source[i];
500	uint64_t addr;
501	struct apei_mapreg *read_ack;
502	ACPI_STATUS rv;
503	char ctx[sizeof("HEST[4294967295, Id=65535]")];
504
505	snprintf(ctx, sizeof(ctx), "HEST[%"PRIu32", Id=%"PRIu16"]",
506	    i, ghes_v2->Header.SourceId);
507
508	/*
509	 * Verify the source is enabled before proceeding.  The Enabled
510	 * field is 8 bits with 256 possibilities, but only two of the
511	 * possibilities, 0 and 1, have semantics defined in the spec,
512	 * so out of an abundance of caution let's tread carefully in
513	 * case anything changes and noisily reject any values other
514	 * than 1.
515	 */
516	switch (ghes_v2->Enabled) {
517	case 1:
518		break;
519	case 0:
520		aprint_debug_dev(sc->sc_dev, "%s: disabled\n", ctx);
521		return;
522	default:
523		aprint_error_dev(sc->sc_dev, "%s:"
524		    " unknown GHESv2 Enabled state: 0x%"PRIx8"\n", ctx,
525		    ghes_v2->Enabled);
526		return;
527	}
528
529	/*
530	 * Verify the Error Status Address bit width is at most 64 bits
531	 * before proceeding with this source.  When we get 128-bit
532	 * addressing, this code will have to be updated.
533	 */
534	if (ghes_v2->ErrorStatusAddress.BitWidth > 64) {
535		aprint_error_dev(sc->sc_dev, "%s: excessive address bits:"
536		    " %"PRIu8"\n", ctx, ghes_v2->ErrorStatusAddress.BitWidth);
537		return;
538	}
539
540	/*
541	 * Read the GHESv2 Error Status Addresss.  This is the physical
542	 * address of a GESB, Generic Error Status Block.  Why the
543	 * physical address is exposed via this indirection, and not
544	 * simply stored directly in the GHESv2, is unclear to me.
545	 * Hoping it's not because the address can change dynamically,
546	 * because the error handling path shouldn't involve mapping
547	 * anything.
548	 */
549	rv = AcpiRead(&addr, &ghes_v2->ErrorStatusAddress);
550	if (ACPI_FAILURE(rv)) {
551		aprint_error_dev(sc->sc_dev, "%s:"
552		    " failed to read error status address: %s", ctx,
553		    AcpiFormatException(rv));
554		return;
555	}
556	aprint_debug_dev(sc->sc_dev, "%s: error status @ 0x%"PRIx64"\n", ctx,
557	    addr);
558
559	/*
560	 * Try to map the Read Ack register up front, so we don't have
561	 * to allocate and free kva in AcpiRead/AcpiWrite at the time
562	 * we're handling an error.  Bail if we can't.
563	 */
564	read_ack = apei_mapreg_map(&ghes_v2->ReadAckRegister);
565	if (read_ack == NULL) {
566		aprint_error_dev(sc->sc_dev, "%s:"
567		    " unable to map Read Ack register\n", ctx);
568		return;
569	}
570
571	/*
572	 * Initialize the source and map the GESB it in the error
573	 * handling path.
574	 */
575	src->as_sc = sc;
576	src->as_header = &ghes_v2->Header;
577	src->as_ghes_v2.gesb = AcpiOsMapMemory(addr,
578	    ghes_v2->ErrorBlockLength);
579	src->as_ghes_v2.read_ack = read_ack;
580
581	/*
582	 * Arrange to receive notifications.
583	 */
584	switch (ghes_v2->Notify.Type) {
585	case ACPI_HEST_NOTIFY_POLLED:
586		callout_init(&src->as_ch, CALLOUT_MPSAFE);
587		callout_setfunc(&src->as_ch, &apei_hest_ghes_v2_poll, src);
588		callout_schedule(&src->as_ch, 0);
589		break;
590	case ACPI_HEST_NOTIFY_SCI:
591	case ACPI_HEST_NOTIFY_GPIO:
592		/*
593		 * SCI and GPIO notifications are delivered through
594		 * Hardware Error Device (PNP0C33) events.
595		 *
596		 * XXX Where is this spelled out?  The text at
597		 * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#event-notification-for-generic-error-sources
598		 * is vague.
599		 */
600		SIMPLEQ_INSERT_TAIL(&hsc->hsc_hed_list, src, as_entry);
601		break;
602#if defined(__i386__) || defined(__x86_64__)
603	case ACPI_HEST_NOTIFY_NMI:
604		src->as_nmi = nmi_establish(&apei_hest_ghes_v2_nmi, src);
605		break;
606#endif
607	}
608
609	/*
610	 * Now that we have notification set up, process and
611	 * acknowledge the initial GESB report if any.
612	 */
613	apei_hest_ghes_handle(sc, src);
614}
615
616/*
617 * apei_hest_detach_ghes_v2(sc, ghes_v2, i)
618 *
619 *	Detach the ith source, which is a Generic Hardware Error Source
620 *	v2.
621 *
622 *	After this point, the system will ignore notifications from
623 *	this source.
624 */
625static void
626apei_hest_detach_ghes_v2(struct apei_softc *sc, ACPI_HEST_GENERIC_V2 *ghes_v2,
627    uint32_t i)
628{
629	struct apei_hest_softc *hsc = &sc->sc_hest;
630	struct apei_source *src = &hsc->hsc_source[i];
631
632	/*
633	 * Arrange to stop receiving notifications.
634	 */
635	switch (ghes_v2->Notify.Type) {
636	case ACPI_HEST_NOTIFY_POLLED:
637		callout_halt(&src->as_ch, NULL);
638		callout_destroy(&src->as_ch);
639		break;
640	case ACPI_HEST_NOTIFY_SCI:
641	case ACPI_HEST_NOTIFY_GPIO:
642		/*
643		 * No need to spend time removing the entry; no further
644		 * calls via apei_hed_notify are possible at this
645		 * point, now that detach has begun.
646		 */
647		break;
648#if defined(__i386__) || defined(__x86_64__)
649	case ACPI_HEST_NOTIFY_NMI:
650		nmi_disestablish(src->as_nmi);
651		src->as_nmi = NULL;
652		break;
653#endif
654	}
655
656	/*
657	 * No more notifications.  Unmap the GESB and read ack register
658	 * now that it will no longer be used in error handling path.
659	 */
660	AcpiOsUnmapMemory(src->as_ghes_v2.gesb, ghes_v2->ErrorBlockLength);
661	src->as_ghes_v2.gesb = NULL;
662	apei_mapreg_unmap(&ghes_v2->ReadAckRegister, src->as_ghes_v2.read_ack);
663	src->as_ghes_v2.read_ack = NULL;
664	src->as_header = NULL;
665	src->as_sc = NULL;
666}
667
668/*
669 * apei_hest_attach_source(sc, header, i, size_t maxlen)
670 *
671 *	Attach the ith source in the Hardware Error Source Table given
672 *	its header, and return a pointer to the header of the next
673 *	source in the table, provided it is no more than maxlen bytes
674 *	past header.  Return NULL if the size of the source is unknown
675 *	or would exceed maxlen bytes.
676 */
677static ACPI_HEST_HEADER *
678apei_hest_attach_source(struct apei_softc *sc, ACPI_HEST_HEADER *header,
679    uint32_t i, size_t maxlen)
680{
681	char ctx[sizeof("HEST[4294967295, Id=65535]")];
682
683	snprintf(ctx, sizeof(ctx), "HEST[%"PRIu32", Id=%"PRIu16"]",
684	    i, header->SourceId);
685
686	switch (header->Type) {
687	case ACPI_HEST_TYPE_IA32_CHECK: {
688		ACPI_HEST_IA_MACHINE_CHECK *const imc = container_of(header,
689		    ACPI_HEST_IA_MACHINE_CHECK, Header);
690
691		aprint_error_dev(sc->sc_dev, "%s:"
692		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
693
694		if (maxlen < sizeof(*imc))
695			return NULL;
696		maxlen -= sizeof(*imc);
697		ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imc + 1);
698		if (maxlen < imc->NumHardwareBanks*sizeof(*bank))
699			return NULL;
700		return (ACPI_HEST_HEADER *)(bank + imc->NumHardwareBanks);
701	}
702	case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: {
703		ACPI_HEST_IA_CORRECTED *const imcc = container_of(header,
704		    ACPI_HEST_IA_CORRECTED, Header);
705
706		aprint_error_dev(sc->sc_dev, "%s:"
707		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
708
709		if (maxlen < sizeof(*imcc))
710			return NULL;
711		maxlen -= sizeof(*imcc);
712		ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imcc + 1);
713		if (maxlen < imcc->NumHardwareBanks*sizeof(*bank))
714			return NULL;
715		return (ACPI_HEST_HEADER *)(bank + imcc->NumHardwareBanks);
716	}
717	case ACPI_HEST_TYPE_IA32_NMI: {
718		ACPI_HEST_IA_NMI *const ianmi = container_of(header,
719		    ACPI_HEST_IA_NMI, Header);
720
721		aprint_error_dev(sc->sc_dev, "%s:"
722		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
723
724		if (maxlen < sizeof(*ianmi))
725			return NULL;
726		return (ACPI_HEST_HEADER *)(ianmi + 1);
727	}
728	case ACPI_HEST_TYPE_AER_ROOT_PORT: {
729		ACPI_HEST_AER_ROOT *const aerroot = container_of(header,
730		    ACPI_HEST_AER_ROOT, Header);
731
732		aprint_error_dev(sc->sc_dev, "%s:"
733		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
734
735		if (maxlen < sizeof(*aerroot))
736			return NULL;
737		return (ACPI_HEST_HEADER *)(aerroot + 1);
738	}
739	case ACPI_HEST_TYPE_AER_ENDPOINT: {
740		ACPI_HEST_AER *const aer = container_of(header,
741		    ACPI_HEST_AER, Header);
742
743		aprint_error_dev(sc->sc_dev, "%s:"
744		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
745
746		if (maxlen < sizeof(*aer))
747			return NULL;
748		return (ACPI_HEST_HEADER *)(aer + 1);
749	}
750	case ACPI_HEST_TYPE_AER_BRIDGE: {
751		ACPI_HEST_AER_BRIDGE *const aerbridge = container_of(header,
752		    ACPI_HEST_AER_BRIDGE, Header);
753
754		aprint_error_dev(sc->sc_dev, "%s:"
755		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
756
757		if (maxlen < sizeof(*aerbridge))
758			return NULL;
759		return (ACPI_HEST_HEADER *)(aerbridge + 1);
760	}
761	case ACPI_HEST_TYPE_GENERIC_ERROR: {
762		ACPI_HEST_GENERIC *const ghes = container_of(header,
763		    ACPI_HEST_GENERIC, Header);
764
765		if (maxlen < sizeof(*ghes))
766			return NULL;
767		apei_hest_attach_ghes(sc, ghes, i);
768		return (ACPI_HEST_HEADER *)(ghes + 1);
769	}
770	case ACPI_HEST_TYPE_GENERIC_ERROR_V2: {
771		ACPI_HEST_GENERIC_V2 *const ghes_v2 = container_of(header,
772		    ACPI_HEST_GENERIC_V2, Header);
773
774		if (maxlen < sizeof(*ghes_v2))
775			return NULL;
776		apei_hest_attach_ghes_v2(sc, ghes_v2, i);
777		return (ACPI_HEST_HEADER *)(ghes_v2 + 1);
778	}
779	case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK: {
780		ACPI_HEST_IA_DEFERRED_CHECK *const imdc = container_of(header,
781		    ACPI_HEST_IA_DEFERRED_CHECK, Header);
782
783		aprint_error_dev(sc->sc_dev, "%s:"
784		    " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type);
785
786		if (maxlen < sizeof(*imdc))
787			return NULL;
788		maxlen -= sizeof(*imdc);
789		ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imdc + 1);
790		if (maxlen < imdc->NumHardwareBanks*sizeof(*bank))
791			return NULL;
792		return (ACPI_HEST_HEADER *)(bank + imdc->NumHardwareBanks);
793	}
794	case ACPI_HEST_TYPE_NOT_USED3:
795	case ACPI_HEST_TYPE_NOT_USED4:
796	case ACPI_HEST_TYPE_NOT_USED5:
797	default:
798		aprint_error_dev(sc->sc_dev, "%s: unknown type:"
799		    " 0x%04"PRIx16"\n", ctx, header->Type);
800		if (header->Type >= 12) {
801			/*
802			 * `Beginning with error source type 12 and
803			 *  onward, each Error Source Structure must
804			 *  use the standard Error Source Structure
805			 *  Header as defined below.'
806			 *
807			 * Not yet in acpica, though, so we copy this
808			 * down manually.
809			 */
810			struct {
811				UINT16	Type;
812				UINT16	Length;
813			} *const essh = (void *)header;
814
815			if (maxlen < sizeof(*essh) || maxlen < essh->Length)
816				return NULL;
817			return (ACPI_HEST_HEADER *)((char *)header +
818			    essh->Length);
819		}
820		return NULL;
821	}
822}
823
824/*
825 * apei_hest_detach_source(sc, header, i)
826 *
827 *	Detach the ith source in the Hardware Error Status Table.
828 *	Caller is assumed to have stored where each source's header is,
829 *	so no need to return the pointer to the header of the next
830 *	source in the table.
831 */
832static void
833apei_hest_detach_source(struct apei_softc *sc, ACPI_HEST_HEADER *header,
834    uint32_t i)
835{
836
837	switch (header->Type) {
838	case ACPI_HEST_TYPE_GENERIC_ERROR: {
839		ACPI_HEST_GENERIC *ghes = container_of(header,
840		    ACPI_HEST_GENERIC, Header);
841
842		apei_hest_detach_ghes(sc, ghes, i);
843		break;
844	}
845	case ACPI_HEST_TYPE_GENERIC_ERROR_V2: {
846		ACPI_HEST_GENERIC_V2 *ghes_v2 = container_of(header,
847		    ACPI_HEST_GENERIC_V2, Header);
848
849		apei_hest_detach_ghes_v2(sc, ghes_v2, i);
850		break;
851	}
852	case ACPI_HEST_TYPE_IA32_CHECK:
853	case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK:
854	case ACPI_HEST_TYPE_IA32_NMI:
855	case ACPI_HEST_TYPE_NOT_USED3:
856	case ACPI_HEST_TYPE_NOT_USED4:
857	case ACPI_HEST_TYPE_NOT_USED5:
858	case ACPI_HEST_TYPE_AER_ROOT_PORT:
859	case ACPI_HEST_TYPE_AER_ENDPOINT:
860	case ACPI_HEST_TYPE_AER_BRIDGE:
861	case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK:
862	default:
863		/* XXX shouldn't happen */
864		break;
865	}
866}
867
868/*
869 * apei_hest_attach(sc)
870 *
871 *	Scan the Hardware Error Source Table and attach sources
872 *	enumerated in it so we can receive and process hardware errors
873 *	during operation.
874 */
875void
876apei_hest_attach(struct apei_softc *sc)
877{
878	ACPI_TABLE_HEST *hest = sc->sc_tab.hest;
879	struct apei_hest_softc *hsc = &sc->sc_hest;
880	ACPI_HEST_HEADER *header, *next;
881	uint32_t i, n;
882	size_t resid;
883
884	/*
885	 * Initialize the HED (Hardware Error Device, PNP0C33)
886	 * notification list so apei_hed_notify becomes a noop with no
887	 * extra effort even if we fail to attach anything.
888	 */
889	SIMPLEQ_INIT(&hsc->hsc_hed_list);
890
891	/*
892	 * Verify the table is large enough.
893	 */
894	if (hest->Header.Length < sizeof(*hest)) {
895		aprint_error_dev(sc->sc_dev, "HEST: truncated table:"
896		    " %"PRIu32" < %zu minimum bytes\n",
897		    hest->Header.Length, sizeof(*hest));
898		return;
899	}
900
901	n = hest->ErrorSourceCount;
902	aprint_normal_dev(sc->sc_dev, "HEST: %"PRIu32
903	    " hardware error source%s\n", n, n == 1 ? "" : "s");
904
905	/*
906	 * This could be SIZE_MAX but let's put a smaller arbitrary
907	 * limit on it; if you have gigabytes of HEST something is
908	 * probably wrong.
909	 */
910	if (n > INT32_MAX/sizeof(hsc->hsc_source[0])) {
911		aprint_error_dev(sc->sc_dev, "HEST: too many error sources\n");
912		return;
913	}
914	hsc->hsc_source = kmem_zalloc(n * sizeof(hsc->hsc_source[0]),
915	    KM_SLEEP);
916
917	header = (ACPI_HEST_HEADER *)(hest + 1);
918	resid = hest->Header.Length - sizeof(*hest);
919	for (i = 0; i < n && resid; i++, header = next) {
920		next = apei_hest_attach_source(sc, header, i, resid);
921		if (next == NULL) {
922			aprint_error_dev(sc->sc_dev, "truncated source:"
923			    " %"PRIu32"\n", i);
924			break;
925		}
926		KASSERT(header < next);
927		KASSERT((size_t)((const char *)next - (const char *)header) <=
928		    resid);
929		resid -= (const char *)next - (const char *)header;
930	}
931	if (resid) {
932		aprint_error_dev(sc->sc_dev, "HEST:"
933		    " %zu bytes of trailing garbage after %"PRIu32" entries\n",
934		    resid, n);
935	}
936}
937
938/*
939 * apei_hest_detach(sc)
940 *
941 *	Stop receiving and processing hardware error notifications and
942 *	free resources set up from the Hardware Error Source Table.
943 */
944void
945apei_hest_detach(struct apei_softc *sc)
946{
947	ACPI_TABLE_HEST *hest = sc->sc_tab.hest;
948	struct apei_hest_softc *hsc = &sc->sc_hest;
949	uint32_t i, n;
950
951	if (hsc->hsc_source) {
952		n = hest->ErrorSourceCount;
953		for (i = 0; i < n; i++) {
954			struct apei_source *src = &hsc->hsc_source[i];
955			ACPI_HEST_HEADER *header = src->as_header;
956
957			if (src->as_header == NULL)
958				continue;
959			apei_hest_detach_source(sc, header, i);
960		}
961		kmem_free(hsc->hsc_source, n * sizeof(hsc->hsc_source[0]));
962		hsc->hsc_source = NULL;
963	}
964}
965
966void
967apei_hed_notify(void)
968{
969	device_t apei0;
970	struct apei_softc *sc;
971	struct apei_hest_softc *hsc;
972	struct apei_source *src;
973
974	/*
975	 * Take a reference to the apei0 device so it doesn't go away
976	 * while we're working.
977	 */
978	if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL)
979		goto out;
980	sc = device_private(apei0);
981
982	/*
983	 * If there's no HEST, nothing to do.
984	 */
985	if (sc->sc_tab.hest == NULL)
986		goto out;
987	hsc = &sc->sc_hest;
988
989	/*
990	 * Walk through the HED-notified hardware error sources and
991	 * check them.  The list is stable until we release apei0.
992	 */
993	SIMPLEQ_FOREACH(src, &hsc->hsc_hed_list, as_entry) {
994		ACPI_HEST_HEADER *const header = src->as_header;
995
996		switch (header->Type) {
997		case ACPI_HEST_TYPE_GENERIC_ERROR:
998			apei_hest_ghes_handle(sc, src);
999			break;
1000		case ACPI_HEST_TYPE_GENERIC_ERROR_V2:
1001			apei_hest_ghes_v2_handle(sc, src);
1002			break;
1003		case ACPI_HEST_TYPE_IA32_CHECK:
1004		case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK:
1005		case ACPI_HEST_TYPE_IA32_NMI:
1006		case ACPI_HEST_TYPE_NOT_USED3:
1007		case ACPI_HEST_TYPE_NOT_USED4:
1008		case ACPI_HEST_TYPE_NOT_USED5:
1009		case ACPI_HEST_TYPE_AER_ROOT_PORT:
1010		case ACPI_HEST_TYPE_AER_ENDPOINT:
1011		case ACPI_HEST_TYPE_AER_BRIDGE:
1012//		case ACPI_HEST_TYPE_GENERIC_ERROR:
1013//		case ACPI_HEST_TYPE_GENERIC_ERROR_V2:
1014		case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK:
1015		default:
1016			/* XXX shouldn't happen */
1017			break;
1018		}
1019	}
1020
1021out:	if (apei0) {
1022		device_release(apei0);
1023		apei0 = NULL;
1024	}
1025}
1026