1/*	$OpenBSD: amd64errata.c,v 1.13 2022/10/10 03:01:11 jsg Exp $	*/
2/*	$NetBSD: errata.c,v 1.6 2007/02/05 21:05:45 ad Exp $	*/
3
4/*-
5 * Copyright (c) 2007 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * Detect, report on, and work around known errata with AMD's amd64 CPUs.
35 *
36 * This is generalised because there are quite a few problems that the
37 * BIOS can patch via MSR, but it is not known if the OS can patch these
38 * yet.  The list is expected to grow over time.
39 *
40 * The data here is from:
41 *
42 * Revision Guide for AMD Athlon 64 and AMD Opteron Processors (0Fh)
43 * Publication #25759, Revision: 3.79, Issue Date: July 2009
44 * BH-E4, CH-CG, CH-D0, DH-CG, DH-D0, DH-E3, DH-E6, JH-E1, JH-E6, SH-B0,
45 * SH-B3, SH-C0, SH-CG, SH-D0, SH-E4, SH-E5
46 *
47 * Revision Guide for AMD Family 10h Processors
48 * Publication #41322, Revision: 3.92, Issue Date: March 2012
49 * BL-C2, BL-C3, DA-C2, DA-C3, DR-B2, DR-B3, DR-BA, HY-D0, HY-D1,
50 * HY-D1-G34R1, PH-E0, RB-C2, RB-C3
51 *
52 * Revision Guide for AMD Family 12h Processors
53 * Publication #44739, Revision: 3.10, Issue Date: March 2012
54 * LN-B0
55 */
56
57#include <sys/param.h>
58#include <sys/systm.h>
59
60#include <machine/cpu.h>
61#include <machine/cpufunc.h>
62#include <machine/specialreg.h>
63
64typedef struct errata {
65	u_short		e_num;
66	u_short		e_reported;
67	u_int		e_data1;
68	const uint8_t	*e_set;
69	int		(*e_act)(struct cpu_info *, struct errata *);
70	uint64_t	e_data2;
71} errata_t;
72
73typedef enum cpurev {
74	BH_E4, CH_CG, CH_D0, DH_CG, DH_D0, DH_E3, DH_E6, JH_E1,
75	JH_E6, SH_B0, SH_B3, SH_C0, SH_CG, SH_D0, SH_E4, SH_E5,
76	DR_BA, DR_B2, DR_B3, RB_C2, RB_C3, BL_C2, BL_C3, DA_C2,
77	DA_C3, HY_D0, HY_D1, PH_E0, LN_B0,
78	OINK
79} cpurev_t;
80
81static const u_int cpurevs[] = {
82	BH_E4, 0x0020fb1, CH_CG, 0x0000f82, CH_CG, 0x0000fb2,
83	CH_D0, 0x0010f80, CH_D0, 0x0010fb0, DH_CG, 0x0000fc0,
84	DH_CG, 0x0000fe0, DH_CG, 0x0000ff0, DH_D0, 0x0010fc0,
85	DH_D0, 0x0010ff0, DH_E3, 0x0020fc0, DH_E3, 0x0020ff0,
86	DH_E6, 0x0020fc2, DH_E6, 0x0020ff2, JH_E1, 0x0020f10,
87	JH_E6, 0x0020f12, JH_E6, 0x0020f32, SH_B0, 0x0000f40,
88	SH_B3, 0x0000f51, SH_C0, 0x0000f48, SH_C0, 0x0000f58,
89	SH_CG, 0x0000f4a, SH_CG, 0x0000f5a, SH_CG, 0x0000f7a,
90	SH_D0, 0x0010f40, SH_D0, 0x0010f50, SH_D0, 0x0010f70,
91	SH_E4, 0x0020f51, SH_E4, 0x0020f71, SH_E5, 0x0020f42,
92	DR_BA, 0x0100f2a, DR_B2, 0x0100f22, DR_B3, 0x0100f23,
93	RB_C2, 0x0100f42, RB_C3, 0x0100f43, BL_C2, 0x0100f52,
94	BL_C3, 0x0100f53, DA_C2, 0x0100f62, DA_C3, 0x0100f63,
95	HY_D0, 0x0100f80, HY_D1, 0x0100f81, HY_D1, 0x0100f91,
96	PH_E0, 0x0100fa0, LN_B0, 0x0300f10, SH_B0, 0x0000f50,
97	OINK
98};
99
100static const uint8_t amd64_errata_set1[] = {
101	SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, OINK
102};
103
104#ifdef MULTIPROCESSOR
105static const uint8_t amd64_errata_set2[] = {
106	SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
107};
108#endif
109
110static const uint8_t amd64_errata_set3[] = {
111	JH_E1, DH_E3, OINK
112};
113
114#if 0
115static const uint8_t amd64_errata_set4[] = {
116	SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, JH_E1,
117	DH_E3, SH_E4, BH_E4, SH_E5, DH_E6, JH_E6, OINK
118};
119#endif
120
121static const uint8_t amd64_errata_set5[] = {
122	SH_B3, OINK
123};
124
125static const uint8_t amd64_errata_set6[] = {
126	SH_C0, SH_CG, DH_CG, CH_CG, OINK
127};
128
129static const uint8_t amd64_errata_set7[] = {
130	SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
131};
132
133static const uint8_t amd64_errata_set8[] = {
134	BH_E4, CH_CG, CH_CG, CH_D0, CH_D0, DH_CG, DH_CG, DH_CG,
135	DH_D0, DH_D0, DH_E3, DH_E3, DH_E6, DH_E6, JH_E1, JH_E6,
136	JH_E6, SH_B0, SH_B3, SH_C0, SH_C0, SH_CG, SH_CG, SH_CG,
137	SH_D0, SH_D0, SH_D0, SH_E4, SH_E4, SH_E5, OINK
138};
139
140static const uint8_t amd64_errata_set9[] = {
141	DR_BA, DR_B2, DR_B3, RB_C2, RB_C3, BL_C2, BL_C3, DA_C2,
142	DA_C3, HY_D0, HY_D1, PH_E0, LN_B0, OINK
143};
144
145int amd64_errata_setmsr(struct cpu_info *, errata_t *);
146int amd64_errata_testmsr(struct cpu_info *, errata_t *);
147
148static errata_t errata[] = {
149	/*
150	 * 81: Cache Coherency Problem with Hardware Prefetching
151	 * and Streaming Stores
152	 */
153	{
154		81, 0, MSR_DC_CFG, amd64_errata_set5,
155		amd64_errata_testmsr, DC_CFG_DIS_SMC_CHK_BUF
156	},
157	/*
158	 * 86: DRAM Data Masking Feature Can Cause ECC Failures
159	 */
160	{
161		86, 0, MSR_NB_CFG, amd64_errata_set1,
162		amd64_errata_testmsr, NB_CFG_DISDATMSK
163	},
164	/*
165	 * 89: Potential Deadlock With Locked Transactions
166	 */
167	{
168		89, 0, MSR_NB_CFG, amd64_errata_set8,
169		amd64_errata_testmsr, NB_CFG_DISIOREQLOCK
170	},
171	/*
172	 * 94: Sequential Prefetch Feature May Cause Incorrect
173	 * Processor Operation
174	 */
175	{
176		94, 0, MSR_IC_CFG, amd64_errata_set1,
177		amd64_errata_testmsr, IC_CFG_DIS_SEQ_PREFETCH
178	},
179	/*
180	 * 97: 128-Bit Streaming Stores May Cause Coherency
181	 * Failure
182	 *
183	 * XXX "This workaround must not be applied to processors
184	 * prior to revision C0."  We don't apply it, but if it
185	 * can't be applied, it shouldn't be reported.
186	 */
187	{
188		97, 0, MSR_DC_CFG, amd64_errata_set6,
189		amd64_errata_testmsr, DC_CFG_DIS_CNV_WC_SSO
190	},
191	/*
192	 * 104: DRAM Data Masking Feature Causes ChipKill ECC
193	 * Failures When Enabled With x8/x16 DRAM Devices
194	 */
195	{
196		104, 0, MSR_NB_CFG, amd64_errata_set7,
197		amd64_errata_testmsr, NB_CFG_DISDATMSK
198	},
199	/*
200	 * 113: Enhanced Write-Combining Feature Causes System Hang
201	 */
202	{
203		113, 0, MSR_BU_CFG, amd64_errata_set3,
204		amd64_errata_setmsr, BU_CFG_WBENHWSBDIS
205	},
206#ifdef MULTIPROCESSOR
207	/*
208	 * 69: Multiprocessor Coherency Problem with Hardware
209	 * Prefetch Mechanism
210	 */
211	{
212		69, 0, MSR_BU_CFG, amd64_errata_set5,
213		amd64_errata_setmsr, BU_CFG_WBPFSMCCHKDIS
214	},
215	/*
216	 * 101: DRAM Scrubber May Cause Data Corruption When Using
217	 * Node-Interleaved Memory
218	 */
219	{
220		101, 0, 0, amd64_errata_set2,
221		NULL, 0
222	},
223	/*
224	 * 106: Potential Deadlock with Tightly Coupled Semaphores
225	 * in an MP System
226	 */
227	{
228		106, 0, MSR_LS_CFG, amd64_errata_set2,
229		amd64_errata_testmsr, LS_CFG_DIS_LS2_SQUISH
230	},
231	/*
232	 * 107: Possible Multiprocessor Coherency Problem with
233	 * Setting Page Table A/D Bits
234	 */
235	{
236		107, 0, MSR_BU_CFG, amd64_errata_set2,
237		amd64_errata_testmsr, BU_CFG_THRL2IDXCMPDIS
238	},
239#if 0
240	/*
241	 * 122: TLB Flush Filter May Cause Coherency Problem in
242	 * Multiprocessor Systems
243	 */
244	{
245		122, 0, MSR_HWCR, amd64_errata_set4,
246		amd64_errata_setmsr, HWCR_FFDIS
247	},
248#endif
249#endif	/* MULTIPROCESSOR */
250	/*
251	 * 721: Processor May Incorrectly Update Stack Pointer
252	 */
253	{
254		721, 0, MSR_DE_CFG, amd64_errata_set9,
255		amd64_errata_setmsr, DE_CFG_721
256	},
257};
258
259int
260amd64_errata_testmsr(struct cpu_info *ci, errata_t *e)
261{
262	uint64_t val;
263
264	(void)ci;
265
266	val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
267	if ((val & e->e_data2) != 0)
268		return 0;		/* not found */
269
270	e->e_reported = 1;
271	return 1;			/* found */
272}
273
274int
275amd64_errata_setmsr(struct cpu_info *ci, errata_t *e)
276{
277	uint64_t val;
278
279	(void)ci;
280
281	val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
282	if ((val & e->e_data2) != 0)
283		return 0;		/* not found */
284
285	wrmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE, val | e->e_data2);
286
287#ifdef ERRATA_DEBUG
288	printf("ERRATA: writing a fix\n");
289	val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
290	if ((val & e->e_data2) != 0)
291		printf("ERRATA: fix seems to have worked!\n");
292#endif
293
294	e->e_reported = 1;
295	return 2;			/* found and fixed */
296}
297
298void
299amd64_errata(struct cpu_info *ci)
300{
301	u_int32_t code, dummy;
302	errata_t *e, *ex;
303	cpurev_t rev;
304	int i, j;
305	int rc;
306	int found = 0;
307	int corrected = 0;
308	static int printed = 0;
309
310	CPUID(0x80000001, code, dummy, dummy, dummy);
311
312	for (i = 0; ; i += 2) {
313		if ((rev = cpurevs[i]) == OINK) {
314#ifdef ERRATA_DEBUG
315			printf("ERRATA: this CPU ok\n");
316#endif
317			return;
318		}
319		if (cpurevs[i + 1] == code) {
320#ifdef ERRATA_DEBUG
321			printf("ERRATA: this CPU has errata\n");
322#endif
323			break;
324		}
325	}
326
327	ex = errata + nitems(errata);
328
329	/* Reset e_reporteds (for multiple CPUs) */
330	for (e = errata; e < ex; e++)
331		e->e_reported = 0;
332
333	for (e = errata; e < ex; e++) {
334		if (e->e_reported)
335			continue;
336		if (e->e_set != NULL) {
337			for (j = 0; e->e_set[j] != OINK; j++)
338				if (e->e_set[j] == rev)
339					break;
340			if (e->e_set[j] == OINK)
341				continue;
342		}
343
344#ifdef ERRATA_DEBUG
345		printf("%s: testing for erratum %d\n",
346		    ci->ci_dev->dv_xname, e->e_num);
347#endif
348
349		/*
350		 * If we have an action routine, call it, otherwise
351		 * the default is that this erratum is present.
352		 */
353		rc = (e->e_act == NULL) ? 1 : (*e->e_act)(ci, e);
354
355		if (rc == 0)			/* not found */
356			continue;
357		if (rc == 1)
358			found++;
359		if (rc == 2)
360			corrected++;
361
362		e->e_reported = rc;
363
364#ifdef ERRATA_DEBUG
365		printf("%s: erratum %d present%s\n",
366		    ci->ci_dev->dv_xname, e->e_num,
367		    (rc == 2) ? " and patched" : "");
368#endif
369	}
370
371#define ERRATA_VERBOSE
372#ifdef ERRATA_VERBOSE
373	if (corrected) {
374		int first = 1;
375
376		/* Print out found and corrected */
377		if (!printed) {
378			printf("%s: AMD %s", ci->ci_dev->dv_xname,
379			    (corrected == 1) ? "erratum" : "errata");
380		}
381		for (e = errata; e < ex; e++) {
382			if (e->e_reported == 2) {
383				if (!printed) {
384					if (! first)
385						printf(",");
386					printf(" %d", e->e_num);
387				}
388				first = 0;
389			}
390		}
391		if (!printed)
392			printf(" detected and fixed\n");
393	}
394#endif
395
396	if (found) {
397		int first = 1;
398
399		/* Print out found but not corrected */
400		if (!printed) {
401			printf("%s: AMD %s", ci->ci_dev->dv_xname,
402			    (found == 1) ? "erratum" : "errata");
403		}
404		for (e = errata; e < ex; e++) {
405			if (e->e_reported == 1) {
406				if (!printed) {
407					if (! first)
408						printf(",");
409					printf(" %d", e->e_num);
410				}
411				first = 0;
412			}
413		}
414		if (!printed)
415			printf(" present, BIOS upgrade may be required\n");
416	}
417
418	/* Print only one time for the first CPU */
419	printed = 1;
420}
421