1314638Savg/*-
2314638Savg * Copyright (c) 2017 Andriy Gapon
3314638Savg * All rights reserved.
4314638Savg *
5314638Savg * Redistribution and use in source and binary forms, with or without
6314638Savg * modification, are permitted provided that the following conditions
7314638Savg * are met:
8314638Savg * 1. Redistributions of source code must retain the above copyright
9314638Savg *    notice, this list of conditions and the following disclaimer.
10314638Savg * 2. Redistributions in binary form must reproduce the above copyright
11314638Savg *    notice, this list of conditions and the following disclaimer in the
12314638Savg *    documentation and/or other materials provided with the distribution.
13314638Savg *
14314638Savg * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15314638Savg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16314638Savg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17314638Savg * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18314638Savg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19314638Savg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20314638Savg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21314638Savg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22314638Savg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23314638Savg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24314638Savg * SUCH DAMAGE.
25314638Savg *
26314638Savg * $FreeBSD: stable/11/sys/dev/amd_ecc_inject/ecc_inject.c 314638 2017-03-03 22:51:04Z avg $
27314638Savg */
28314638Savg
29314638Savg#include <sys/param.h>
30314638Savg#include <sys/systm.h>
31314638Savg#include <sys/bus.h>
32314638Savg#include <sys/kernel.h>
33314638Savg#include <sys/conf.h>
34314638Savg#include <sys/malloc.h>
35314638Savg#include <sys/module.h>
36314638Savg#include <sys/sysctl.h>
37314638Savg#include <sys/types.h>
38314638Savg
39314638Savg#include <dev/pci/pcivar.h>
40314638Savg
41314638Savg#include <vm/vm.h>
42314638Savg#include <vm/vm_extern.h>
43314638Savg#include <vm/vm_kern.h>
44314638Savg
45314638Savg#include <machine/cputypes.h>
46314638Savg#include <machine/md_var.h>
47314638Savg
48314638Savg
49314638Savg/*
50314638Savg * See BKDG for AMD Family 15h Models 00h-0Fh Processors
51314638Savg * (publication 42301 Rev 3.08 - March 12, 2012):
52314638Savg * - 2.13.3.1 DRAM Error Injection
53314638Savg * - D18F3xB8 NB Array Address
54314638Savg * - D18F3xBC NB Array Data Port
55314638Savg * - D18F3xBC_x8 DRAM ECC
56314638Savg */
57314638Savg#define	NB_MCA_CFG		0x44
58314638Savg#define		DRAM_ECC_EN	(1 << 22)
59314638Savg#define	NB_MCA_EXTCFG		0x180
60314638Savg#define		ECC_SYMB_SZ	(1 << 25)
61314638Savg#define	NB_ARRAY_ADDR		0xb8
62314638Savg#define		DRAM_ECC_SEL	(0x8 << 28)
63314638Savg#define		QUADRANT_SHIFT	1
64314638Savg#define		QUADRANT_MASK	0x3
65314638Savg#define	NB_ARRAY_PORT		0xbc
66314638Savg#define		INJ_WORD_SHIFT	20
67314638Savg#define		INJ_WORD_MASK	0x1ff
68314638Savg#define		DRAM_ERR_EN	(1 << 18)
69314638Savg#define		DRAM_WR_REQ	(1 << 17)
70314638Savg#define		DRAM_RD_REQ	(1 << 16)
71314638Savg#define		INJ_VECTOR_MASK	0xffff
72314638Savg
73314638Savgstatic void ecc_ei_inject(int);
74314638Savg
75314638Savgstatic device_t nbdev;
76314638Savgstatic int delay_ms = 0;
77314638Savgstatic int quadrant = 0;	/* 0 - 3 */
78314638Savgstatic int word_mask = 0x001;	/* 9 bits: 8 + 1 for ECC */
79314638Savgstatic int bit_mask = 0x0001;	/* 16 bits */
80314638Savg
81314638Savgstatic int
82314638Savgsysctl_int_with_max(SYSCTL_HANDLER_ARGS)
83314638Savg{
84314638Savg	u_int value;
85314638Savg	int error;
86314638Savg
87314638Savg	value = *(u_int *)arg1;
88314638Savg	error = sysctl_handle_int(oidp, &value, 0, req);
89314638Savg	if (error || req->newptr == NULL)
90314638Savg		return (error);
91314638Savg	if (value > arg2)
92314638Savg		return (EINVAL);
93314638Savg	*(u_int *)arg1 = value;
94314638Savg	return (0);
95314638Savg}
96314638Savg
97314638Savgstatic int
98314638Savgsysctl_nonzero_int_with_max(SYSCTL_HANDLER_ARGS)
99314638Savg{
100314638Savg	u_int value;
101314638Savg	int error;
102314638Savg
103314638Savg	value = *(u_int *)arg1;
104314638Savg	error = sysctl_int_with_max(oidp, &value, arg2, req);
105314638Savg	if (error || req->newptr == NULL)
106314638Savg		return (error);
107314638Savg	if (value == 0)
108314638Savg		return (EINVAL);
109314638Savg	*(u_int *)arg1 = value;
110314638Savg	return (0);
111314638Savg}
112314638Savg
113314638Savgstatic int
114314638Savgsysctl_proc_inject(SYSCTL_HANDLER_ARGS)
115314638Savg{
116314638Savg	int error;
117314638Savg	int i;
118314638Savg
119314638Savg	i = 0;
120314638Savg	error = sysctl_handle_int(oidp, &i, 0, req);
121314638Savg	if (error)
122314638Savg		return (error);
123314638Savg	if (i != 0)
124314638Savg		ecc_ei_inject(i);
125314638Savg	return (0);
126314638Savg}
127314638Savg
128314638Savgstatic SYSCTL_NODE(_hw, OID_AUTO, error_injection, CTLFLAG_RD, NULL,
129314638Savg    "Hardware error injection");
130314638Savgstatic SYSCTL_NODE(_hw_error_injection, OID_AUTO, dram_ecc, CTLFLAG_RD, NULL,
131314638Savg    "DRAM ECC error injection");
132314638SavgSYSCTL_UINT(_hw_error_injection_dram_ecc, OID_AUTO, delay,
133314638Savg    CTLTYPE_UINT | CTLFLAG_RW, &delay_ms, 0,
134314638Savg    "Delay in milliseconds between error injections");
135314638SavgSYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, quadrant,
136314638Savg    CTLTYPE_UINT | CTLFLAG_RW, &quadrant, QUADRANT_MASK,
137314638Savg    sysctl_int_with_max, "IU",
138314638Savg    "Index of 16-byte quadrant within 64-byte line where errors "
139314638Savg    "should be injected");
140314638SavgSYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, word_mask,
141314638Savg    CTLTYPE_UINT | CTLFLAG_RW, &word_mask, INJ_WORD_MASK,
142314638Savg    sysctl_nonzero_int_with_max, "IU",
143314638Savg    "9-bit mask of words where errors should be injected (8 data + 1 ECC)");
144314638SavgSYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, bit_mask,
145314638Savg    CTLTYPE_UINT | CTLFLAG_RW, &bit_mask, INJ_VECTOR_MASK,
146314638Savg    sysctl_nonzero_int_with_max, "IU",
147314638Savg    "16-bit mask of bits within each selected word where errors "
148314638Savg    "should be injected");
149314638SavgSYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, inject,
150314638Savg    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_proc_inject, "I",
151314638Savg    "Inject a number of errors according to configured parameters");
152314638Savg
153314638Savgstatic void
154314638Savgecc_ei_inject_one(void *arg, size_t size)
155314638Savg{
156314638Savg	volatile uint64_t *memory = arg;
157314638Savg	uint32_t val;
158314638Savg	int i;
159314638Savg
160314638Savg	val = DRAM_ECC_SEL | (quadrant << QUADRANT_SHIFT);
161314638Savg	pci_write_config(nbdev, NB_ARRAY_ADDR, val, 4);
162314638Savg
163314638Savg	val = (word_mask << INJ_WORD_SHIFT) | DRAM_WR_REQ | bit_mask;
164314638Savg	pci_write_config(nbdev, NB_ARRAY_PORT, val, 4);
165314638Savg
166314638Savg	for (i = 0; i < size / sizeof(uint64_t); i++) {
167314638Savg		memory[i] = 0;
168314638Savg		val = pci_read_config(nbdev, NB_ARRAY_PORT, 4);
169314638Savg		if ((val & DRAM_WR_REQ) == 0)
170314638Savg			break;
171314638Savg	}
172314638Savg	for (i = 0; i < size / sizeof(uint64_t); i++)
173314638Savg		memory[0] = memory[i];
174314638Savg}
175314638Savg
176314638Savgstatic void
177314638Savgecc_ei_inject(int count)
178314638Savg{
179314638Savg	vm_offset_t memory;
180314638Savg	int injected;
181314638Savg
182314638Savg	KASSERT((quadrant & ~QUADRANT_MASK) == 0,
183314638Savg	    ("quadrant value is outside of range: %u", quadrant));
184314638Savg	KASSERT(word_mask != 0 && (word_mask & ~INJ_WORD_MASK) == 0,
185314638Savg	    ("word mask value is outside of range: 0x%x", word_mask));
186314638Savg	KASSERT(bit_mask != 0 && (bit_mask & ~INJ_VECTOR_MASK) == 0,
187314638Savg	    ("bit mask value is outside of range: 0x%x", bit_mask));
188314638Savg
189314638Savg	memory = kmem_alloc_attr(kernel_arena, PAGE_SIZE, M_WAITOK, 0, ~0,
190314638Savg	    VM_MEMATTR_UNCACHEABLE);
191314638Savg
192314638Savg	for (injected = 0; injected < count; injected++) {
193314638Savg		ecc_ei_inject_one((void*)memory, PAGE_SIZE);
194314638Savg		if (delay_ms != 0 && injected != count - 1)
195314638Savg			pause_sbt("ecc_ei_inject", delay_ms * SBT_1MS, 0, 0);
196314638Savg	}
197314638Savg
198314638Savg	kmem_free(kernel_arena, memory, PAGE_SIZE);
199314638Savg}
200314638Savg
201314638Savgstatic int
202314638Savgecc_ei_load(void)
203314638Savg{
204314638Savg	uint32_t val;
205314638Savg
206314638Savg	if (cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) {
207314638Savg		printf("DRAM ECC error injection is not supported\n");
208314638Savg		return (ENXIO);
209314638Savg	}
210314638Savg	nbdev = pci_find_bsf(0, 24, 3);
211314638Savg	if (nbdev == NULL) {
212314638Savg		printf("Couldn't find NB PCI device\n");
213314638Savg		return (ENXIO);
214314638Savg	}
215314638Savg	val = pci_read_config(nbdev, NB_MCA_CFG, 4);
216314638Savg	if ((val & DRAM_ECC_EN) == 0) {
217314638Savg		printf("DRAM ECC is not supported or disabled\n");
218314638Savg		return (ENXIO);
219314638Savg	}
220314638Savg	printf("DRAM ECC error injection support loaded\n");
221314638Savg	return (0);
222314638Savg}
223314638Savg
224314638Savgstatic int
225314638Savgtsc_modevent(module_t mod __unused, int type, void *data __unused)
226314638Savg{
227314638Savg	int error;
228314638Savg
229314638Savg	error = 0;
230314638Savg	switch (type) {
231314638Savg	case MOD_LOAD:
232314638Savg		error = ecc_ei_load();
233314638Savg		break;
234314638Savg	case MOD_UNLOAD:
235314638Savg	case MOD_SHUTDOWN:
236314638Savg		break;
237314638Savg	default:
238314638Savg		return (EOPNOTSUPP);
239314638Savg	}
240314638Savg	return (0);
241314638Savg}
242314638Savg
243314638SavgDEV_MODULE(tsc, tsc_modevent, NULL);
244