1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/param.h>
30#include <sys/kernel.h>
31#include <sys/systm.h>
32#include <sys/malloc.h>
33
34#include <vm/vm.h>
35#include <vm/pmap.h>
36
37#include <dev/pci/pcireg.h>
38
39#include <machine/vmparam.h>
40#include <contrib/dev/acpica/include/acpi.h>
41
42#include "io/iommu.h"
43
44/*
45 * Documented in the "Intel Virtualization Technology for Directed I/O",
46 * Architecture Spec, September 2008.
47 */
48
49#define VTD_DRHD_INCLUDE_PCI_ALL(Flags)  (((Flags) >> 0) & 0x1)
50
51/* Section 10.4 "Register Descriptions" */
52struct vtdmap {
53	volatile uint32_t	version;
54	volatile uint32_t	res0;
55	volatile uint64_t	cap;
56	volatile uint64_t	ext_cap;
57	volatile uint32_t	gcr;
58	volatile uint32_t	gsr;
59	volatile uint64_t	rta;
60	volatile uint64_t	ccr;
61};
62
63#define	VTD_CAP_SAGAW(cap)	(((cap) >> 8) & 0x1F)
64#define	VTD_CAP_ND(cap)		((cap) & 0x7)
65#define	VTD_CAP_CM(cap)		(((cap) >> 7) & 0x1)
66#define	VTD_CAP_SPS(cap)	(((cap) >> 34) & 0xF)
67#define	VTD_CAP_RWBF(cap)	(((cap) >> 4) & 0x1)
68
69#define	VTD_ECAP_DI(ecap)	(((ecap) >> 2) & 0x1)
70#define	VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
71#define	VTD_ECAP_IRO(ecap)	(((ecap) >> 8) & 0x3FF)
72
73#define	VTD_GCR_WBF		(1 << 27)
74#define	VTD_GCR_SRTP		(1 << 30)
75#define	VTD_GCR_TE		(1U << 31)
76
77#define	VTD_GSR_WBFS		(1 << 27)
78#define	VTD_GSR_RTPS		(1 << 30)
79#define	VTD_GSR_TES		(1U << 31)
80
81#define	VTD_CCR_ICC		(1UL << 63)	/* invalidate context cache */
82#define	VTD_CCR_CIRG_GLOBAL	(1UL << 61)	/* global invalidation */
83
84#define	VTD_IIR_IVT		(1UL << 63)	/* invalidation IOTLB */
85#define	VTD_IIR_IIRG_GLOBAL	(1ULL << 60)	/* global IOTLB invalidation */
86#define	VTD_IIR_IIRG_DOMAIN	(2ULL << 60)	/* domain IOTLB invalidation */
87#define	VTD_IIR_IIRG_PAGE	(3ULL << 60)	/* page IOTLB invalidation */
88#define	VTD_IIR_DRAIN_READS	(1ULL << 49)	/* drain pending DMA reads */
89#define	VTD_IIR_DRAIN_WRITES	(1ULL << 48)	/* drain pending DMA writes */
90#define	VTD_IIR_DOMAIN_P	32
91
92#define	VTD_ROOT_PRESENT	0x1
93#define	VTD_CTX_PRESENT		0x1
94#define	VTD_CTX_TT_ALL		(1UL << 2)
95
96#define	VTD_PTE_RD		(1UL << 0)
97#define	VTD_PTE_WR		(1UL << 1)
98#define	VTD_PTE_SUPERPAGE	(1UL << 7)
99#define	VTD_PTE_ADDR_M		(0x000FFFFFFFFFF000UL)
100
101#define VTD_RID2IDX(rid)	(((rid) & 0xff) * 2)
102
103struct domain {
104	uint64_t	*ptp;		/* first level page table page */
105	int		pt_levels;	/* number of page table levels */
106	int		addrwidth;	/* 'AW' field in context entry */
107	int		spsmask;	/* supported super page sizes */
108	u_int		id;		/* domain id */
109	vm_paddr_t	maxaddr;	/* highest address to be mapped */
110	SLIST_ENTRY(domain) next;
111};
112
113static SLIST_HEAD(, domain) domhead;
114
115#define	DRHD_MAX_UNITS	16
116static ACPI_DMAR_HARDWARE_UNIT	*drhds[DRHD_MAX_UNITS];
117static int			drhd_num;
118static struct vtdmap		*vtdmaps[DRHD_MAX_UNITS];
119static int			max_domains;
120typedef int			(*drhd_ident_func_t)(void);
121
122static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
123static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
124
125static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
126
127static int
128vtd_max_domains(struct vtdmap *vtdmap)
129{
130	int nd;
131
132	nd = VTD_CAP_ND(vtdmap->cap);
133
134	switch (nd) {
135	case 0:
136		return (16);
137	case 1:
138		return (64);
139	case 2:
140		return (256);
141	case 3:
142		return (1024);
143	case 4:
144		return (4 * 1024);
145	case 5:
146		return (16 * 1024);
147	case 6:
148		return (64 * 1024);
149	default:
150		panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
151	}
152}
153
154static u_int
155domain_id(void)
156{
157	u_int id;
158	struct domain *dom;
159
160	/* Skip domain id 0 - it is reserved when Caching Mode field is set */
161	for (id = 1; id < max_domains; id++) {
162		SLIST_FOREACH(dom, &domhead, next) {
163			if (dom->id == id)
164				break;
165		}
166		if (dom == NULL)
167			break;		/* found it */
168	}
169
170	if (id >= max_domains)
171		panic("domain ids exhausted");
172
173	return (id);
174}
175
176static struct vtdmap *
177vtd_device_scope(uint16_t rid)
178{
179	int i, remaining, pathremaining;
180	char *end, *pathend;
181	struct vtdmap *vtdmap;
182	ACPI_DMAR_HARDWARE_UNIT *drhd;
183	ACPI_DMAR_DEVICE_SCOPE *device_scope;
184	ACPI_DMAR_PCI_PATH *path;
185
186	for (i = 0; i < drhd_num; i++) {
187		drhd = drhds[i];
188
189		if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) {
190			/*
191			 * From Intel VT-d arch spec, version 3.0:
192			 * If a DRHD structure with INCLUDE_PCI_ALL flag Set is reported
193			 * for a Segment, it must be enumerated by BIOS after all other
194			 * DRHD structures for the same Segment.
195			 */
196			vtdmap = vtdmaps[i];
197			return(vtdmap);
198		}
199
200		end = (char *)drhd + drhd->Header.Length;
201		remaining = drhd->Header.Length - sizeof(ACPI_DMAR_HARDWARE_UNIT);
202		while (remaining > sizeof(ACPI_DMAR_DEVICE_SCOPE)) {
203			device_scope = (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining);
204			remaining -= device_scope->Length;
205
206			switch (device_scope->EntryType){
207				/* 0x01 and 0x02 are PCI device entries */
208				case 0x01:
209				case 0x02:
210					break;
211				default:
212					continue;
213			}
214
215			if (PCI_RID2BUS(rid) != device_scope->Bus)
216				continue;
217
218			pathend = (char *)device_scope + device_scope->Length;
219			pathremaining = device_scope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE);
220			while (pathremaining >= sizeof(ACPI_DMAR_PCI_PATH)) {
221				path = (ACPI_DMAR_PCI_PATH *)(pathend - pathremaining);
222				pathremaining -= sizeof(ACPI_DMAR_PCI_PATH);
223
224				if (PCI_RID2SLOT(rid) != path->Device)
225					continue;
226				if (PCI_RID2FUNC(rid) != path->Function)
227					continue;
228
229				vtdmap = vtdmaps[i];
230				return (vtdmap);
231			}
232		}
233	}
234
235	/* No matching scope */
236	return (NULL);
237}
238
239static void
240vtd_wbflush(struct vtdmap *vtdmap)
241{
242
243	if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
244		pmap_invalidate_cache();
245
246	if (VTD_CAP_RWBF(vtdmap->cap)) {
247		vtdmap->gcr = VTD_GCR_WBF;
248		while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
249			;
250	}
251}
252
253static void
254vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
255{
256
257	vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
258	while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
259		;
260}
261
262static void
263vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
264{
265	int offset;
266	volatile uint64_t *iotlb_reg, val;
267
268	vtd_wbflush(vtdmap);
269
270	offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
271	iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
272
273	*iotlb_reg =  VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
274		      VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
275
276	while (1) {
277		val = *iotlb_reg;
278		if ((val & VTD_IIR_IVT) == 0)
279			break;
280	}
281}
282
283static void
284vtd_translation_enable(struct vtdmap *vtdmap)
285{
286
287	vtdmap->gcr = VTD_GCR_TE;
288	while ((vtdmap->gsr & VTD_GSR_TES) == 0)
289		;
290}
291
292static void
293vtd_translation_disable(struct vtdmap *vtdmap)
294{
295
296	vtdmap->gcr = 0;
297	while ((vtdmap->gsr & VTD_GSR_TES) != 0)
298		;
299}
300
301static int
302vtd_init(void)
303{
304	int i, units, remaining, tmp;
305	struct vtdmap *vtdmap;
306	vm_paddr_t ctx_paddr;
307	char *end, envname[32];
308	unsigned long mapaddr;
309	ACPI_STATUS status;
310	ACPI_TABLE_DMAR *dmar;
311	ACPI_DMAR_HEADER *hdr;
312	ACPI_DMAR_HARDWARE_UNIT *drhd;
313
314	/*
315	 * Allow the user to override the ACPI DMAR table by specifying the
316	 * physical address of each remapping unit.
317	 *
318	 * The following example specifies two remapping units at
319	 * physical addresses 0xfed90000 and 0xfeda0000 respectively.
320	 * set vtd.regmap.0.addr=0xfed90000
321	 * set vtd.regmap.1.addr=0xfeda0000
322	 */
323	for (units = 0; units < DRHD_MAX_UNITS; units++) {
324		snprintf(envname, sizeof(envname), "vtd.regmap.%d.addr", units);
325		if (getenv_ulong(envname, &mapaddr) == 0)
326			break;
327		vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr);
328	}
329
330	if (units > 0)
331		goto skip_dmar;
332
333	/* Search for DMAR table. */
334	status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar);
335	if (ACPI_FAILURE(status))
336		return (ENXIO);
337
338	end = (char *)dmar + dmar->Header.Length;
339	remaining = dmar->Header.Length - sizeof(ACPI_TABLE_DMAR);
340	while (remaining > sizeof(ACPI_DMAR_HEADER)) {
341		hdr = (ACPI_DMAR_HEADER *)(end - remaining);
342		if (hdr->Length > remaining)
343			break;
344		/*
345		 * From Intel VT-d arch spec, version 1.3:
346		 * BIOS implementations must report mapping structures
347		 * in numerical order, i.e. All remapping structures of
348		 * type 0 (DRHD) enumerated before remapping structures of
349		 * type 1 (RMRR) and so forth.
350		 */
351		if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
352			break;
353
354		drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
355		drhds[units] = drhd;
356		vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
357		if (++units >= DRHD_MAX_UNITS)
358			break;
359		remaining -= hdr->Length;
360	}
361
362	if (units <= 0)
363		return (ENXIO);
364
365skip_dmar:
366	drhd_num = units;
367
368	max_domains = 64 * 1024; /* maximum valid value */
369	for (i = 0; i < drhd_num; i++){
370		vtdmap = vtdmaps[i];
371
372		if (VTD_CAP_CM(vtdmap->cap) != 0)
373			panic("vtd_init: invalid caching mode");
374
375		/* take most compatible (minimum) value */
376		if ((tmp = vtd_max_domains(vtdmap)) < max_domains)
377			max_domains = tmp;
378	}
379
380	/*
381	 * Set up the root-table to point to the context-entry tables
382	 */
383	for (i = 0; i < 256; i++) {
384		ctx_paddr = vtophys(ctx_tables[i]);
385		if (ctx_paddr & PAGE_MASK)
386			panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
387
388		root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
389	}
390
391	return (0);
392}
393
394static void
395vtd_cleanup(void)
396{
397}
398
399static void
400vtd_enable(void)
401{
402	int i;
403	struct vtdmap *vtdmap;
404
405	for (i = 0; i < drhd_num; i++) {
406		vtdmap = vtdmaps[i];
407		vtd_wbflush(vtdmap);
408
409		/* Update the root table address */
410		vtdmap->rta = vtophys(root_table);
411		vtdmap->gcr = VTD_GCR_SRTP;
412		while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
413			;
414
415		vtd_ctx_global_invalidate(vtdmap);
416		vtd_iotlb_global_invalidate(vtdmap);
417
418		vtd_translation_enable(vtdmap);
419	}
420}
421
422static void
423vtd_disable(void)
424{
425	int i;
426	struct vtdmap *vtdmap;
427
428	for (i = 0; i < drhd_num; i++) {
429		vtdmap = vtdmaps[i];
430		vtd_translation_disable(vtdmap);
431	}
432}
433
434static void
435vtd_add_device(void *arg, uint16_t rid)
436{
437	int idx;
438	uint64_t *ctxp;
439	struct domain *dom = arg;
440	vm_paddr_t pt_paddr;
441	struct vtdmap *vtdmap;
442	uint8_t bus;
443
444	KASSERT(dom != NULL, ("domain is NULL"));
445
446	bus = PCI_RID2BUS(rid);
447	ctxp = ctx_tables[bus];
448	pt_paddr = vtophys(dom->ptp);
449	idx = VTD_RID2IDX(rid);
450
451	if (ctxp[idx] & VTD_CTX_PRESENT) {
452		panic("vtd_add_device: device %x is already owned by "
453		      "domain %d", rid,
454		      (uint16_t)(ctxp[idx + 1] >> 8));
455	}
456
457	if ((vtdmap = vtd_device_scope(rid)) == NULL)
458		panic("vtd_add_device: device %x is not in scope for "
459		      "any DMA remapping unit", rid);
460
461	/*
462	 * Order is important. The 'present' bit is set only after all fields
463	 * of the context pointer are initialized.
464	 */
465	ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
466
467	if (VTD_ECAP_DI(vtdmap->ext_cap))
468		ctxp[idx] = VTD_CTX_TT_ALL;
469	else
470		ctxp[idx] = 0;
471
472	ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
473
474	/*
475	 * 'Not Present' entries are not cached in either the Context Cache
476	 * or in the IOTLB, so there is no need to invalidate either of them.
477	 */
478}
479
480static void
481vtd_remove_device(void *arg, uint16_t rid)
482{
483	int i, idx;
484	uint64_t *ctxp;
485	struct vtdmap *vtdmap;
486	uint8_t bus;
487
488	bus = PCI_RID2BUS(rid);
489	ctxp = ctx_tables[bus];
490	idx = VTD_RID2IDX(rid);
491
492	/*
493	 * Order is important. The 'present' bit is must be cleared first.
494	 */
495	ctxp[idx] = 0;
496	ctxp[idx + 1] = 0;
497
498	/*
499	 * Invalidate the Context Cache and the IOTLB.
500	 *
501	 * XXX use device-selective invalidation for Context Cache
502	 * XXX use domain-selective invalidation for IOTLB
503	 */
504	for (i = 0; i < drhd_num; i++) {
505		vtdmap = vtdmaps[i];
506		vtd_ctx_global_invalidate(vtdmap);
507		vtd_iotlb_global_invalidate(vtdmap);
508	}
509}
510
511#define	CREATE_MAPPING	0
512#define	REMOVE_MAPPING	1
513
514static uint64_t
515vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
516		   int remove)
517{
518	struct domain *dom;
519	int i, spshift, ptpshift, ptpindex, nlevels;
520	uint64_t spsize, *ptp;
521
522	dom = arg;
523	ptpindex = 0;
524	ptpshift = 0;
525
526	KASSERT(gpa + len > gpa, ("%s: invalid gpa range %#lx/%#lx", __func__,
527	    gpa, len));
528	KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %#lx/%#lx beyond "
529	    "domain maxaddr %#lx", __func__, gpa, len, dom->maxaddr));
530
531	if (gpa & PAGE_MASK)
532		panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
533
534	if (hpa & PAGE_MASK)
535		panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
536
537	if (len & PAGE_MASK)
538		panic("vtd_create_mapping: unaligned len 0x%0lx", len);
539
540	/*
541	 * Compute the size of the mapping that we can accommodate.
542	 *
543	 * This is based on three factors:
544	 * - supported super page size
545	 * - alignment of the region starting at 'gpa' and 'hpa'
546	 * - length of the region 'len'
547	 */
548	spshift = 48;
549	for (i = 3; i >= 0; i--) {
550		spsize = 1UL << spshift;
551		if ((dom->spsmask & (1 << i)) != 0 &&
552		    (gpa & (spsize - 1)) == 0 &&
553		    (hpa & (spsize - 1)) == 0 &&
554		    (len >= spsize)) {
555			break;
556		}
557		spshift -= 9;
558	}
559
560	ptp = dom->ptp;
561	nlevels = dom->pt_levels;
562	while (--nlevels >= 0) {
563		ptpshift = 12 + nlevels * 9;
564		ptpindex = (gpa >> ptpshift) & 0x1FF;
565
566		/* We have reached the leaf mapping */
567		if (spshift >= ptpshift) {
568			break;
569		}
570
571		/*
572		 * We are working on a non-leaf page table page.
573		 *
574		 * Create a downstream page table page if necessary and point
575		 * to it from the current page table.
576		 */
577		if (ptp[ptpindex] == 0) {
578			void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
579			ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
580		}
581
582		ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
583	}
584
585	if ((gpa & ((1UL << ptpshift) - 1)) != 0)
586		panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
587
588	/*
589	 * Update the 'gpa' -> 'hpa' mapping
590	 */
591	if (remove) {
592		ptp[ptpindex] = 0;
593	} else {
594		ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
595
596		if (nlevels > 0)
597			ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
598	}
599
600	return (1UL << ptpshift);
601}
602
603static uint64_t
604vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
605{
606
607	return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING));
608}
609
610static uint64_t
611vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
612{
613
614	return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING));
615}
616
617static void
618vtd_invalidate_tlb(void *dom)
619{
620	int i;
621	struct vtdmap *vtdmap;
622
623	/*
624	 * Invalidate the IOTLB.
625	 * XXX use domain-selective invalidation for IOTLB
626	 */
627	for (i = 0; i < drhd_num; i++) {
628		vtdmap = vtdmaps[i];
629		vtd_iotlb_global_invalidate(vtdmap);
630	}
631}
632
633static void *
634vtd_create_domain(vm_paddr_t maxaddr)
635{
636	struct domain *dom;
637	vm_paddr_t addr;
638	int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
639	struct vtdmap *vtdmap;
640
641	if (drhd_num <= 0)
642		panic("vtd_create_domain: no dma remapping hardware available");
643
644	/*
645	 * Calculate AGAW.
646	 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
647	 */
648	addr = 0;
649	for (gaw = 0; addr < maxaddr; gaw++)
650		addr = 1ULL << gaw;
651
652	res = (gaw - 12) % 9;
653	if (res == 0)
654		agaw = gaw;
655	else
656		agaw = gaw + 9 - res;
657
658	if (agaw > 64)
659		agaw = 64;
660
661	/*
662	 * Select the smallest Supported AGAW and the corresponding number
663	 * of page table levels.
664	 */
665	pt_levels = 2;
666	sagaw = 30;
667	addrwidth = 0;
668
669	tmp = ~0;
670	for (i = 0; i < drhd_num; i++) {
671		vtdmap = vtdmaps[i];
672		/* take most compatible value */
673		tmp &= VTD_CAP_SAGAW(vtdmap->cap);
674	}
675
676	for (i = 0; i < 5; i++) {
677		if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
678			break;
679		pt_levels++;
680		addrwidth++;
681		sagaw += 9;
682		if (sagaw > 64)
683			sagaw = 64;
684	}
685
686	if (i >= 5) {
687		panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d",
688		      tmp, agaw);
689	}
690
691	dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
692	dom->pt_levels = pt_levels;
693	dom->addrwidth = addrwidth;
694	dom->id = domain_id();
695	dom->maxaddr = maxaddr;
696	dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
697	if ((uintptr_t)dom->ptp & PAGE_MASK)
698		panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
699
700#ifdef notyet
701	/*
702	 * XXX superpage mappings for the iommu do not work correctly.
703	 *
704	 * By default all physical memory is mapped into the host_domain.
705	 * When a VM is allocated wired memory the pages belonging to it
706	 * are removed from the host_domain and added to the vm's domain.
707	 *
708	 * If the page being removed was mapped using a superpage mapping
709	 * in the host_domain then we need to demote the mapping before
710	 * removing the page.
711	 *
712	 * There is not any code to deal with the demotion at the moment
713	 * so we disable superpage mappings altogether.
714	 */
715	dom->spsmask = ~0;
716	for (i = 0; i < drhd_num; i++) {
717		vtdmap = vtdmaps[i];
718		/* take most compatible value */
719		dom->spsmask &= VTD_CAP_SPS(vtdmap->cap);
720	}
721#endif
722
723	SLIST_INSERT_HEAD(&domhead, dom, next);
724
725	return (dom);
726}
727
728static void
729vtd_free_ptp(uint64_t *ptp, int level)
730{
731	int i;
732	uint64_t *nlp;
733
734	if (level > 1) {
735		for (i = 0; i < 512; i++) {
736			if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
737				continue;
738			if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
739				continue;
740			nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
741			vtd_free_ptp(nlp, level - 1);
742		}
743	}
744
745	bzero(ptp, PAGE_SIZE);
746	free(ptp, M_VTD);
747}
748
749static void
750vtd_destroy_domain(void *arg)
751{
752	struct domain *dom;
753
754	dom = arg;
755
756	SLIST_REMOVE(&domhead, dom, domain, next);
757	vtd_free_ptp(dom->ptp, dom->pt_levels);
758	free(dom, M_VTD);
759}
760
761const struct iommu_ops iommu_ops_intel = {
762	.init = vtd_init,
763	.cleanup = vtd_cleanup,
764	.enable = vtd_enable,
765	.disable = vtd_disable,
766	.create_domain = vtd_create_domain,
767	.destroy_domain = vtd_destroy_domain,
768	.create_mapping = vtd_create_mapping,
769	.remove_mapping = vtd_remove_mapping,
770	.add_device = vtd_add_device,
771	.remove_device = vtd_remove_device,
772	.invalidate_tlb = vtd_invalidate_tlb,
773};
774