1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
5 * Author: Corvin K��hne <c.koehne@beckhoff.com>
6 */
7
8#include <sys/types.h>
9#include <sys/mman.h>
10#include <sys/sysctl.h>
11
12#include <dev/pci/pcireg.h>
13
14#include <err.h>
15#include <errno.h>
16#include <fcntl.h>
17#include <string.h>
18#include <unistd.h>
19
20#include "amd64/e820.h"
21#include "pci_gvt-d-opregion.h"
22#include "pci_passthru.h"
23
24#define KB (1024UL)
25#define MB (1024 * KB)
26#define GB (1024 * MB)
27
28#ifndef _PATH_MEM
29#define _PATH_MEM "/dev/mem"
30#endif
31
32#define PCI_VENDOR_INTEL 0x8086
33
34#define PCIR_BDSM 0x5C	   /* Base of Data Stolen Memory register */
35#define PCIR_ASLS_CTL 0xFC /* Opregion start address register */
36
37#define PCIM_BDSM_GSM_ALIGNMENT \
38	0x00100000 /* Graphics Stolen Memory is 1 MB aligned */
39
40#define GVT_D_MAP_GSM 0
41#define GVT_D_MAP_OPREGION 1
42
43static int
44gvt_d_probe(struct pci_devinst *const pi)
45{
46	struct passthru_softc *sc;
47	uint16_t vendor;
48	uint8_t class;
49
50	sc = pi->pi_arg;
51
52	vendor = pci_host_read_config(passthru_get_sel(sc), PCIR_VENDOR, 0x02);
53	if (vendor != PCI_VENDOR_INTEL)
54		return (ENXIO);
55
56	class = pci_host_read_config(passthru_get_sel(sc), PCIR_CLASS, 0x01);
57	if (class != PCIC_DISPLAY)
58		return (ENXIO);
59
60	return (0);
61}
62
63static vm_paddr_t
64gvt_d_alloc_mmio_memory(const vm_paddr_t host_address, const vm_paddr_t length,
65    const vm_paddr_t alignment, const enum e820_memory_type type)
66{
67	vm_paddr_t address;
68
69	/* Try to reuse host address. */
70	address = e820_alloc(host_address, length, E820_ALIGNMENT_NONE, type,
71	    E820_ALLOCATE_SPECIFIC);
72	if (address != 0) {
73		return (address);
74	}
75
76	/*
77	 * We're not able to reuse the host address. Fall back to the highest usable
78	 * address below 4 GB.
79	 */
80	return (
81	    e820_alloc(4 * GB, length, alignment, type, E820_ALLOCATE_HIGHEST));
82}
83
84/*
85 * Note that the graphics stolen memory is somehow confusing. On the one hand
86 * the Intel Open Source HD Graphics Programmers' Reference Manual states that
87 * it's only GPU accessible. As the CPU can't access the area, the guest
88 * shouldn't need it. On the other hand, the Intel GOP driver refuses to work
89 * properly, if it's not set to a proper address.
90 *
91 * Intel itself maps it into the guest by EPT [1]. At the moment, we're not
92 * aware of any situation where this EPT mapping is required, so we don't do it
93 * yet.
94 *
95 * Intel also states that the Windows driver for Tiger Lake reads the address of
96 * the graphics stolen memory [2]. As the GVT-d code doesn't support Tiger Lake
97 * in its first implementation, we can't check how it behaves. We should keep an
98 * eye on it.
99 *
100 * [1]
101 * https://github.com/projectacrn/acrn-hypervisor/blob/e28d6fbfdfd556ff1bc3ff330e41d4ddbaa0f897/devicemodel/hw/pci/passthrough.c#L655-L657
102 * [2]
103 * https://github.com/projectacrn/acrn-hypervisor/blob/e28d6fbfdfd556ff1bc3ff330e41d4ddbaa0f897/devicemodel/hw/pci/passthrough.c#L626-L629
104 */
105static int
106gvt_d_setup_gsm(struct pci_devinst *const pi)
107{
108	struct passthru_softc *sc;
109	struct passthru_mmio_mapping *gsm;
110	size_t sysctl_len;
111	uint32_t bdsm;
112	int error;
113
114	sc = pi->pi_arg;
115
116	gsm = passthru_get_mmio(sc, GVT_D_MAP_GSM);
117	if (gsm == NULL) {
118		warnx("%s: Unable to access gsm", __func__);
119		return (-1);
120	}
121
122	sysctl_len = sizeof(gsm->hpa);
123	error = sysctlbyname("hw.intel_graphics_stolen_base", &gsm->hpa,
124	    &sysctl_len, NULL, 0);
125	if (error) {
126		warn("%s: Unable to get graphics stolen memory base",
127		    __func__);
128		return (-1);
129	}
130	sysctl_len = sizeof(gsm->len);
131	error = sysctlbyname("hw.intel_graphics_stolen_size", &gsm->len,
132	    &sysctl_len, NULL, 0);
133	if (error) {
134		warn("%s: Unable to get graphics stolen memory length",
135		    __func__);
136		return (-1);
137	}
138	gsm->hva = NULL; /* unused */
139	gsm->gva = NULL; /* unused */
140	gsm->gpa = gvt_d_alloc_mmio_memory(gsm->hpa, gsm->len,
141	    PCIM_BDSM_GSM_ALIGNMENT, E820_TYPE_RESERVED);
142	if (gsm->gpa == 0) {
143		warnx(
144		    "%s: Unable to add Graphics Stolen Memory to E820 table (hpa 0x%lx len 0x%lx)",
145		    __func__, gsm->hpa, gsm->len);
146		e820_dump_table();
147		return (-1);
148	}
149	if (gsm->gpa != gsm->hpa) {
150		/*
151		 * ACRN source code implies that graphics driver for newer Intel
152		 * platforms like Tiger Lake will read the Graphics Stolen Memory
153		 * address from an MMIO register. We have three options to solve this
154		 * issue:
155		 *    1. Patch the value in the MMIO register
156		 *       This could have unintended side effects. Without any
157		 *       documentation how this register is used by the GPU, don't do
158		 *       it.
159		 *    2. Trap the MMIO register
160		 *       It's not possible to trap a single MMIO register. We need to
161		 *       trap a whole page. Trapping a bunch of MMIO register could
162		 *       degrade the performance noticeably. We have to test it.
163		 *    3. Use an 1:1 host to guest mapping
164		 *       Maybe not always possible. As far as we know, no supported
165		 *       platform requires a 1:1 mapping. For that reason, just log a
166		 *       warning.
167		 */
168		warnx(
169		    "Warning: Unable to reuse host address of Graphics Stolen Memory. GPU passthrough might not work properly.");
170	}
171
172	bdsm = pci_host_read_config(passthru_get_sel(sc), PCIR_BDSM, 4);
173	pci_set_cfgdata32(pi, PCIR_BDSM,
174	    gsm->gpa | (bdsm & (PCIM_BDSM_GSM_ALIGNMENT - 1)));
175
176	return (set_pcir_handler(sc, PCIR_BDSM, 4, passthru_cfgread_emulate,
177	    passthru_cfgwrite_emulate));
178}
179
180static int
181gvt_d_setup_opregion(struct pci_devinst *const pi)
182{
183	struct passthru_softc *sc;
184	struct passthru_mmio_mapping *opregion;
185	struct igd_opregion_header *header;
186	uint64_t asls;
187	int memfd;
188
189	sc = pi->pi_arg;
190
191	memfd = open(_PATH_MEM, O_RDONLY, 0);
192	if (memfd < 0) {
193		warn("%s: Failed to open %s", __func__, _PATH_MEM);
194		return (-1);
195	}
196
197	opregion = passthru_get_mmio(sc, GVT_D_MAP_OPREGION);
198	if (opregion == NULL) {
199		warnx("%s: Unable to access opregion", __func__);
200		close(memfd);
201		return (-1);
202	}
203
204	asls = pci_host_read_config(passthru_get_sel(sc), PCIR_ASLS_CTL, 4);
205
206	header = mmap(NULL, sizeof(*header), PROT_READ, MAP_SHARED, memfd,
207	    asls);
208	if (header == MAP_FAILED) {
209		warn("%s: Unable to map OpRegion header", __func__);
210		close(memfd);
211		return (-1);
212	}
213	if (memcmp(header->sign, IGD_OPREGION_HEADER_SIGN,
214	    sizeof(header->sign)) != 0) {
215		warnx("%s: Invalid OpRegion signature", __func__);
216		munmap(header, sizeof(*header));
217		close(memfd);
218		return (-1);
219	}
220
221	opregion->hpa = asls;
222	opregion->len = header->size * KB;
223	munmap(header, sizeof(*header));
224
225	if (opregion->len != sizeof(struct igd_opregion)) {
226		warnx("%s: Invalid OpRegion size of 0x%lx", __func__,
227		    opregion->len);
228		close(memfd);
229		return (-1);
230	}
231
232	opregion->hva = mmap(NULL, opregion->len, PROT_READ, MAP_SHARED, memfd,
233	    opregion->hpa);
234	if (opregion->hva == MAP_FAILED) {
235		warn("%s: Unable to map host OpRegion", __func__);
236		close(memfd);
237		return (-1);
238	}
239	close(memfd);
240
241	opregion->gpa = gvt_d_alloc_mmio_memory(opregion->hpa, opregion->len,
242	    E820_ALIGNMENT_NONE, E820_TYPE_NVS);
243	if (opregion->gpa == 0) {
244		warnx(
245		    "%s: Unable to add OpRegion to E820 table (hpa 0x%lx len 0x%lx)",
246		    __func__, opregion->hpa, opregion->len);
247		e820_dump_table();
248		return (-1);
249	}
250	opregion->gva = vm_map_gpa(pi->pi_vmctx, opregion->gpa, opregion->len);
251	if (opregion->gva == NULL) {
252		warnx("%s: Unable to map guest OpRegion", __func__);
253		return (-1);
254	}
255	if (opregion->gpa != opregion->hpa) {
256		/*
257		 * A 1:1 host to guest mapping is not required but this could
258		 * change in the future.
259		 */
260		warnx(
261		    "Warning: Unable to reuse host address of OpRegion. GPU passthrough might not work properly.");
262	}
263
264	memcpy(opregion->gva, opregion->hva, opregion->len);
265
266	pci_set_cfgdata32(pi, PCIR_ASLS_CTL, opregion->gpa);
267
268	return (set_pcir_handler(sc, PCIR_ASLS_CTL, 4, passthru_cfgread_emulate,
269	    passthru_cfgwrite_emulate));
270}
271
272static int
273gvt_d_init(struct pci_devinst *const pi, nvlist_t *const nvl __unused)
274{
275	int error;
276
277	if ((error = gvt_d_setup_gsm(pi)) != 0) {
278		warnx("%s: Unable to setup Graphics Stolen Memory", __func__);
279		goto done;
280	}
281
282	if ((error = gvt_d_setup_opregion(pi)) != 0) {
283		warnx("%s: Unable to setup OpRegion", __func__);
284		goto done;
285	}
286
287done:
288	return (error);
289}
290
291static void
292gvt_d_deinit(struct pci_devinst *const pi)
293{
294	struct passthru_softc *sc;
295	struct passthru_mmio_mapping *opregion;
296
297	sc = pi->pi_arg;
298
299	opregion = passthru_get_mmio(sc, GVT_D_MAP_OPREGION);
300
301	/* HVA is only set, if it's initialized */
302	if (opregion->hva)
303		munmap((void *)opregion->hva, opregion->len);
304}
305
306static struct passthru_dev gvt_d_dev = {
307	.probe = gvt_d_probe,
308	.init = gvt_d_init,
309	.deinit = gvt_d_deinit,
310};
311PASSTHRU_DEV_SET(gvt_d_dev);
312