1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD$
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD$");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33221828Sgrehan#include <sys/kernel.h>
34221828Sgrehan#include <sys/systm.h>
35221828Sgrehan#include <sys/malloc.h>
36221828Sgrehan
37221828Sgrehan#include <vm/vm.h>
38221828Sgrehan#include <vm/pmap.h>
39221828Sgrehan
40221828Sgrehan#include <dev/pci/pcireg.h>
41221828Sgrehan
42221828Sgrehan#include <machine/vmparam.h>
43254548Sneel#include <contrib/dev/acpica/include/acpi.h>
44221828Sgrehan
45221828Sgrehan#include "io/iommu.h"
46221828Sgrehan
47221828Sgrehan/*
48221828Sgrehan * Documented in the "Intel Virtualization Technology for Directed I/O",
49221828Sgrehan * Architecture Spec, September 2008.
50221828Sgrehan */
51221828Sgrehan
52221828Sgrehan/* Section 10.4 "Register Descriptions" */
53221828Sgrehanstruct vtdmap {
54221828Sgrehan	volatile uint32_t	version;
55221828Sgrehan	volatile uint32_t	res0;
56221828Sgrehan	volatile uint64_t	cap;
57221828Sgrehan	volatile uint64_t	ext_cap;
58221828Sgrehan	volatile uint32_t	gcr;
59221828Sgrehan	volatile uint32_t	gsr;
60221828Sgrehan	volatile uint64_t	rta;
61221828Sgrehan	volatile uint64_t	ccr;
62221828Sgrehan};
63221828Sgrehan
64221828Sgrehan#define	VTD_CAP_SAGAW(cap)	(((cap) >> 8) & 0x1F)
65221828Sgrehan#define	VTD_CAP_ND(cap)		((cap) & 0x7)
66221828Sgrehan#define	VTD_CAP_CM(cap)		(((cap) >> 7) & 0x1)
67221828Sgrehan#define	VTD_CAP_SPS(cap)	(((cap) >> 34) & 0xF)
68221828Sgrehan#define	VTD_CAP_RWBF(cap)	(((cap) >> 4) & 0x1)
69221828Sgrehan
70221828Sgrehan#define	VTD_ECAP_DI(ecap)	(((ecap) >> 2) & 0x1)
71221828Sgrehan#define	VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
72221828Sgrehan#define	VTD_ECAP_IRO(ecap)	(((ecap) >> 8) & 0x3FF)
73221828Sgrehan
74221828Sgrehan#define	VTD_GCR_WBF		(1 << 27)
75221828Sgrehan#define	VTD_GCR_SRTP		(1 << 30)
76258780Seadler#define	VTD_GCR_TE		(1U << 31)
77221828Sgrehan
78221828Sgrehan#define	VTD_GSR_WBFS		(1 << 27)
79221828Sgrehan#define	VTD_GSR_RTPS		(1 << 30)
80258780Seadler#define	VTD_GSR_TES		(1U << 31)
81221828Sgrehan
82221828Sgrehan#define	VTD_CCR_ICC		(1UL << 63)	/* invalidate context cache */
83221828Sgrehan#define	VTD_CCR_CIRG_GLOBAL	(1UL << 61)	/* global invalidation */
84221828Sgrehan
85221828Sgrehan#define	VTD_IIR_IVT		(1UL << 63)	/* invalidation IOTLB */
86221828Sgrehan#define	VTD_IIR_IIRG_GLOBAL	(1ULL << 60)	/* global IOTLB invalidation */
87221828Sgrehan#define	VTD_IIR_IIRG_DOMAIN	(2ULL << 60)	/* domain IOTLB invalidation */
88221828Sgrehan#define	VTD_IIR_IIRG_PAGE	(3ULL << 60)	/* page IOTLB invalidation */
89221828Sgrehan#define	VTD_IIR_DRAIN_READS	(1ULL << 49)	/* drain pending DMA reads */
90221828Sgrehan#define	VTD_IIR_DRAIN_WRITES	(1ULL << 48)	/* drain pending DMA writes */
91221828Sgrehan#define	VTD_IIR_DOMAIN_P	32
92221828Sgrehan
93221828Sgrehan#define	VTD_ROOT_PRESENT	0x1
94221828Sgrehan#define	VTD_CTX_PRESENT		0x1
95221828Sgrehan#define	VTD_CTX_TT_ALL		(1UL << 2)
96221828Sgrehan
97221828Sgrehan#define	VTD_PTE_RD		(1UL << 0)
98221828Sgrehan#define	VTD_PTE_WR		(1UL << 1)
99221828Sgrehan#define	VTD_PTE_SUPERPAGE	(1UL << 7)
100221828Sgrehan#define	VTD_PTE_ADDR_M		(0x000FFFFFFFFFF000UL)
101221828Sgrehan
102264009Srstone#define VTD_RID2IDX(rid)	(((rid) & 0xff) * 2)
103264009Srstone
104221828Sgrehanstruct domain {
105221828Sgrehan	uint64_t	*ptp;		/* first level page table page */
106221828Sgrehan	int		pt_levels;	/* number of page table levels */
107221828Sgrehan	int		addrwidth;	/* 'AW' field in context entry */
108221828Sgrehan	int		spsmask;	/* supported super page sizes */
109221828Sgrehan	u_int		id;		/* domain id */
110221828Sgrehan	vm_paddr_t	maxaddr;	/* highest address to be mapped */
111221828Sgrehan	SLIST_ENTRY(domain) next;
112221828Sgrehan};
113221828Sgrehan
114221828Sgrehanstatic SLIST_HEAD(, domain) domhead;
115221828Sgrehan
116221828Sgrehan#define	DRHD_MAX_UNITS	8
117221828Sgrehanstatic int		drhd_num;
118221828Sgrehanstatic struct vtdmap	*vtdmaps[DRHD_MAX_UNITS];
119221828Sgrehanstatic int		max_domains;
120221828Sgrehantypedef int		(*drhd_ident_func_t)(void);
121221828Sgrehan
122221828Sgrehanstatic uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
123221828Sgrehanstatic uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
124221828Sgrehan
125221828Sgrehanstatic MALLOC_DEFINE(M_VTD, "vtd", "vtd");
126221828Sgrehan
127221828Sgrehanstatic int
128221828Sgrehanvtd_max_domains(struct vtdmap *vtdmap)
129221828Sgrehan{
130221828Sgrehan	int nd;
131221828Sgrehan
132221828Sgrehan	nd = VTD_CAP_ND(vtdmap->cap);
133221828Sgrehan
134221828Sgrehan	switch (nd) {
135221828Sgrehan	case 0:
136221828Sgrehan		return (16);
137221828Sgrehan	case 1:
138221828Sgrehan		return (64);
139221828Sgrehan	case 2:
140221828Sgrehan		return (256);
141221828Sgrehan	case 3:
142221828Sgrehan		return (1024);
143221828Sgrehan	case 4:
144221828Sgrehan		return (4 * 1024);
145221828Sgrehan	case 5:
146221828Sgrehan		return (16 * 1024);
147221828Sgrehan	case 6:
148221828Sgrehan		return (64 * 1024);
149221828Sgrehan	default:
150221828Sgrehan		panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
151221828Sgrehan	}
152221828Sgrehan}
153221828Sgrehan
154221828Sgrehanstatic u_int
155221828Sgrehandomain_id(void)
156221828Sgrehan{
157221828Sgrehan	u_int id;
158221828Sgrehan	struct domain *dom;
159221828Sgrehan
160221828Sgrehan	/* Skip domain id 0 - it is reserved when Caching Mode field is set */
161221828Sgrehan	for (id = 1; id < max_domains; id++) {
162221828Sgrehan		SLIST_FOREACH(dom, &domhead, next) {
163221828Sgrehan			if (dom->id == id)
164221828Sgrehan				break;
165221828Sgrehan		}
166221828Sgrehan		if (dom == NULL)
167221828Sgrehan			break;		/* found it */
168221828Sgrehan	}
169221828Sgrehan
170221828Sgrehan	if (id >= max_domains)
171221828Sgrehan		panic("domain ids exhausted");
172221828Sgrehan
173221828Sgrehan	return (id);
174221828Sgrehan}
175221828Sgrehan
176221828Sgrehanstatic void
177221828Sgrehanvtd_wbflush(struct vtdmap *vtdmap)
178221828Sgrehan{
179221828Sgrehan
180221828Sgrehan	if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
181221828Sgrehan		pmap_invalidate_cache();
182221828Sgrehan
183221828Sgrehan	if (VTD_CAP_RWBF(vtdmap->cap)) {
184221828Sgrehan		vtdmap->gcr = VTD_GCR_WBF;
185221828Sgrehan		while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
186221828Sgrehan			;
187221828Sgrehan	}
188221828Sgrehan}
189221828Sgrehan
190221828Sgrehanstatic void
191221828Sgrehanvtd_ctx_global_invalidate(struct vtdmap *vtdmap)
192221828Sgrehan{
193221828Sgrehan
194221828Sgrehan	vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
195221828Sgrehan	while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
196221828Sgrehan		;
197221828Sgrehan}
198221828Sgrehan
199221828Sgrehanstatic void
200221828Sgrehanvtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
201221828Sgrehan{
202221828Sgrehan	int offset;
203221828Sgrehan	volatile uint64_t *iotlb_reg, val;
204221828Sgrehan
205221828Sgrehan	vtd_wbflush(vtdmap);
206221828Sgrehan
207221828Sgrehan	offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
208221828Sgrehan	iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
209221828Sgrehan
210221828Sgrehan	*iotlb_reg =  VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
211221828Sgrehan		      VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
212221828Sgrehan
213221828Sgrehan	while (1) {
214221828Sgrehan		val = *iotlb_reg;
215221828Sgrehan		if ((val & VTD_IIR_IVT) == 0)
216221828Sgrehan			break;
217221828Sgrehan	}
218221828Sgrehan}
219221828Sgrehan
220221828Sgrehanstatic void
221221828Sgrehanvtd_translation_enable(struct vtdmap *vtdmap)
222221828Sgrehan{
223221828Sgrehan
224221828Sgrehan	vtdmap->gcr = VTD_GCR_TE;
225221828Sgrehan	while ((vtdmap->gsr & VTD_GSR_TES) == 0)
226221828Sgrehan		;
227221828Sgrehan}
228221828Sgrehan
229221828Sgrehanstatic void
230221828Sgrehanvtd_translation_disable(struct vtdmap *vtdmap)
231221828Sgrehan{
232221828Sgrehan
233221828Sgrehan	vtdmap->gcr = 0;
234221828Sgrehan	while ((vtdmap->gsr & VTD_GSR_TES) != 0)
235221828Sgrehan		;
236221828Sgrehan}
237221828Sgrehan
238221828Sgrehanstatic int
239221828Sgrehanvtd_init(void)
240221828Sgrehan{
241254548Sneel	int i, units, remaining;
242221828Sgrehan	struct vtdmap *vtdmap;
243221828Sgrehan	vm_paddr_t ctx_paddr;
244254548Sneel	char *end, envname[32];
245254548Sneel	unsigned long mapaddr;
246254548Sneel	ACPI_STATUS status;
247254548Sneel	ACPI_TABLE_DMAR *dmar;
248254548Sneel	ACPI_DMAR_HEADER *hdr;
249254548Sneel	ACPI_DMAR_HARDWARE_UNIT *drhd;
250254548Sneel
251254548Sneel	/*
252254548Sneel	 * Allow the user to override the ACPI DMAR table by specifying the
253254548Sneel	 * physical address of each remapping unit.
254254548Sneel	 *
255254548Sneel	 * The following example specifies two remapping units at
256254548Sneel	 * physical addresses 0xfed90000 and 0xfeda0000 respectively.
257254548Sneel	 * set vtd.regmap.0.addr=0xfed90000
258254548Sneel	 * set vtd.regmap.1.addr=0xfeda0000
259254548Sneel	 */
260254548Sneel	for (units = 0; units < DRHD_MAX_UNITS; units++) {
261254548Sneel		snprintf(envname, sizeof(envname), "vtd.regmap.%d.addr", units);
262254548Sneel		if (getenv_ulong(envname, &mapaddr) == 0)
263221828Sgrehan			break;
264254548Sneel		vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr);
265221828Sgrehan	}
266221828Sgrehan
267254548Sneel	if (units > 0)
268254548Sneel		goto skip_dmar;
269254548Sneel
270254548Sneel	/* Search for DMAR table. */
271254548Sneel	status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar);
272254548Sneel	if (ACPI_FAILURE(status))
273254548Sneel		return (ENXIO);
274254548Sneel
275254548Sneel	end = (char *)dmar + dmar->Header.Length;
276254548Sneel	remaining = dmar->Header.Length - sizeof(ACPI_TABLE_DMAR);
277254548Sneel	while (remaining > sizeof(ACPI_DMAR_HEADER)) {
278254548Sneel		hdr = (ACPI_DMAR_HEADER *)(end - remaining);
279254548Sneel		if (hdr->Length > remaining)
280254548Sneel			break;
281254548Sneel		/*
282254548Sneel		 * From Intel VT-d arch spec, version 1.3:
283254548Sneel		 * BIOS implementations must report mapping structures
284254548Sneel		 * in numerical order, i.e. All remapping structures of
285254548Sneel		 * type 0 (DRHD) enumerated before remapping structures of
286254548Sneel		 * type 1 (RMRR) and so forth.
287254548Sneel		 */
288254548Sneel		if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
289254548Sneel			break;
290254548Sneel
291254548Sneel		drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
292254548Sneel		vtdmaps[units++] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
293254548Sneel		if (units >= DRHD_MAX_UNITS)
294254548Sneel			break;
295254548Sneel		remaining -= hdr->Length;
296254548Sneel	}
297254548Sneel
298221828Sgrehan	if (units <= 0)
299221828Sgrehan		return (ENXIO);
300221828Sgrehan
301254548Sneelskip_dmar:
302221828Sgrehan	drhd_num = units;
303221828Sgrehan	vtdmap = vtdmaps[0];
304221828Sgrehan
305221828Sgrehan	if (VTD_CAP_CM(vtdmap->cap) != 0)
306221828Sgrehan		panic("vtd_init: invalid caching mode");
307221828Sgrehan
308221828Sgrehan	max_domains = vtd_max_domains(vtdmap);
309221828Sgrehan
310221828Sgrehan	/*
311221828Sgrehan	 * Set up the root-table to point to the context-entry tables
312221828Sgrehan	 */
313221828Sgrehan	for (i = 0; i < 256; i++) {
314221828Sgrehan		ctx_paddr = vtophys(ctx_tables[i]);
315221828Sgrehan		if (ctx_paddr & PAGE_MASK)
316221828Sgrehan			panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
317221828Sgrehan
318221828Sgrehan		root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
319221828Sgrehan	}
320221828Sgrehan
321221828Sgrehan	return (0);
322221828Sgrehan}
323221828Sgrehan
324221828Sgrehanstatic void
325221828Sgrehanvtd_cleanup(void)
326221828Sgrehan{
327221828Sgrehan}
328221828Sgrehan
329221828Sgrehanstatic void
330221828Sgrehanvtd_enable(void)
331221828Sgrehan{
332221828Sgrehan	int i;
333221828Sgrehan	struct vtdmap *vtdmap;
334221828Sgrehan
335221828Sgrehan	for (i = 0; i < drhd_num; i++) {
336221828Sgrehan		vtdmap = vtdmaps[i];
337221828Sgrehan		vtd_wbflush(vtdmap);
338221828Sgrehan
339221828Sgrehan		/* Update the root table address */
340221828Sgrehan		vtdmap->rta = vtophys(root_table);
341221828Sgrehan		vtdmap->gcr = VTD_GCR_SRTP;
342221828Sgrehan		while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
343221828Sgrehan			;
344221828Sgrehan
345221828Sgrehan		vtd_ctx_global_invalidate(vtdmap);
346221828Sgrehan		vtd_iotlb_global_invalidate(vtdmap);
347221828Sgrehan
348221828Sgrehan		vtd_translation_enable(vtdmap);
349221828Sgrehan	}
350221828Sgrehan}
351221828Sgrehan
352221828Sgrehanstatic void
353221828Sgrehanvtd_disable(void)
354221828Sgrehan{
355221828Sgrehan	int i;
356221828Sgrehan	struct vtdmap *vtdmap;
357221828Sgrehan
358221828Sgrehan	for (i = 0; i < drhd_num; i++) {
359221828Sgrehan		vtdmap = vtdmaps[i];
360221828Sgrehan		vtd_translation_disable(vtdmap);
361221828Sgrehan	}
362221828Sgrehan}
363221828Sgrehan
364221828Sgrehanstatic void
365264009Srstonevtd_add_device(void *arg, uint16_t rid)
366221828Sgrehan{
367221828Sgrehan	int idx;
368221828Sgrehan	uint64_t *ctxp;
369221828Sgrehan	struct domain *dom = arg;
370221828Sgrehan	vm_paddr_t pt_paddr;
371221828Sgrehan	struct vtdmap *vtdmap;
372264009Srstone	uint8_t bus;
373221828Sgrehan
374221828Sgrehan	vtdmap = vtdmaps[0];
375264009Srstone	bus = PCI_RID2BUS(rid);
376221828Sgrehan	ctxp = ctx_tables[bus];
377221828Sgrehan	pt_paddr = vtophys(dom->ptp);
378264009Srstone	idx = VTD_RID2IDX(rid);
379221828Sgrehan
380221828Sgrehan	if (ctxp[idx] & VTD_CTX_PRESENT) {
381264009Srstone		panic("vtd_add_device: device %x is already owned by "
382264009Srstone		      "domain %d", rid,
383221828Sgrehan		      (uint16_t)(ctxp[idx + 1] >> 8));
384221828Sgrehan	}
385221828Sgrehan
386221828Sgrehan	/*
387221828Sgrehan	 * Order is important. The 'present' bit is set only after all fields
388221828Sgrehan	 * of the context pointer are initialized.
389221828Sgrehan	 */
390221828Sgrehan	ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
391221828Sgrehan
392221828Sgrehan	if (VTD_ECAP_DI(vtdmap->ext_cap))
393221828Sgrehan		ctxp[idx] = VTD_CTX_TT_ALL;
394221828Sgrehan	else
395221828Sgrehan		ctxp[idx] = 0;
396221828Sgrehan
397221828Sgrehan	ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
398221828Sgrehan
399221828Sgrehan	/*
400221828Sgrehan	 * 'Not Present' entries are not cached in either the Context Cache
401221828Sgrehan	 * or in the IOTLB, so there is no need to invalidate either of them.
402221828Sgrehan	 */
403221828Sgrehan}
404221828Sgrehan
405221828Sgrehanstatic void
406264009Srstonevtd_remove_device(void *arg, uint16_t rid)
407221828Sgrehan{
408221828Sgrehan	int i, idx;
409221828Sgrehan	uint64_t *ctxp;
410221828Sgrehan	struct vtdmap *vtdmap;
411264009Srstone	uint8_t bus;
412221828Sgrehan
413264009Srstone	bus = PCI_RID2BUS(rid);
414221828Sgrehan	ctxp = ctx_tables[bus];
415264009Srstone	idx = VTD_RID2IDX(rid);
416221828Sgrehan
417221828Sgrehan	/*
418221828Sgrehan	 * Order is important. The 'present' bit is must be cleared first.
419221828Sgrehan	 */
420221828Sgrehan	ctxp[idx] = 0;
421221828Sgrehan	ctxp[idx + 1] = 0;
422221828Sgrehan
423221828Sgrehan	/*
424221828Sgrehan	 * Invalidate the Context Cache and the IOTLB.
425221828Sgrehan	 *
426221828Sgrehan	 * XXX use device-selective invalidation for Context Cache
427221828Sgrehan	 * XXX use domain-selective invalidation for IOTLB
428221828Sgrehan	 */
429221828Sgrehan	for (i = 0; i < drhd_num; i++) {
430221828Sgrehan		vtdmap = vtdmaps[i];
431221828Sgrehan		vtd_ctx_global_invalidate(vtdmap);
432221828Sgrehan		vtd_iotlb_global_invalidate(vtdmap);
433221828Sgrehan	}
434221828Sgrehan}
435221828Sgrehan
436241362Sneel#define	CREATE_MAPPING	0
437241362Sneel#define	REMOVE_MAPPING	1
438241362Sneel
439221828Sgrehanstatic uint64_t
440241362Sneelvtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
441241362Sneel		   int remove)
442221828Sgrehan{
443221828Sgrehan	struct domain *dom;
444221828Sgrehan	int i, spshift, ptpshift, ptpindex, nlevels;
445221828Sgrehan	uint64_t spsize, *ptp;
446221828Sgrehan
447221828Sgrehan	dom = arg;
448221828Sgrehan	ptpindex = 0;
449221828Sgrehan	ptpshift = 0;
450221828Sgrehan
451269962Sneel	KASSERT(gpa + len > gpa, ("%s: invalid gpa range %#lx/%#lx", __func__,
452269962Sneel	    gpa, len));
453269962Sneel	KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %#lx/%#lx beyond "
454269962Sneel	    "domain maxaddr %#lx", __func__, gpa, len, dom->maxaddr));
455269962Sneel
456221828Sgrehan	if (gpa & PAGE_MASK)
457221828Sgrehan		panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
458221828Sgrehan
459221828Sgrehan	if (hpa & PAGE_MASK)
460221828Sgrehan		panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
461221828Sgrehan
462221828Sgrehan	if (len & PAGE_MASK)
463221828Sgrehan		panic("vtd_create_mapping: unaligned len 0x%0lx", len);
464221828Sgrehan
465221828Sgrehan	/*
466299009Spfg	 * Compute the size of the mapping that we can accommodate.
467221828Sgrehan	 *
468221828Sgrehan	 * This is based on three factors:
469221828Sgrehan	 * - supported super page size
470221828Sgrehan	 * - alignment of the region starting at 'gpa' and 'hpa'
471221828Sgrehan	 * - length of the region 'len'
472221828Sgrehan	 */
473221828Sgrehan	spshift = 48;
474221828Sgrehan	for (i = 3; i >= 0; i--) {
475221828Sgrehan		spsize = 1UL << spshift;
476221828Sgrehan		if ((dom->spsmask & (1 << i)) != 0 &&
477221828Sgrehan		    (gpa & (spsize - 1)) == 0 &&
478221828Sgrehan		    (hpa & (spsize - 1)) == 0 &&
479221828Sgrehan		    (len >= spsize)) {
480221828Sgrehan			break;
481221828Sgrehan		}
482221828Sgrehan		spshift -= 9;
483221828Sgrehan	}
484221828Sgrehan
485221828Sgrehan	ptp = dom->ptp;
486221828Sgrehan	nlevels = dom->pt_levels;
487221828Sgrehan	while (--nlevels >= 0) {
488221828Sgrehan		ptpshift = 12 + nlevels * 9;
489221828Sgrehan		ptpindex = (gpa >> ptpshift) & 0x1FF;
490221828Sgrehan
491221828Sgrehan		/* We have reached the leaf mapping */
492221828Sgrehan		if (spshift >= ptpshift) {
493221828Sgrehan			break;
494221828Sgrehan		}
495221828Sgrehan
496221828Sgrehan		/*
497221828Sgrehan		 * We are working on a non-leaf page table page.
498221828Sgrehan		 *
499221828Sgrehan		 * Create a downstream page table page if necessary and point
500221828Sgrehan		 * to it from the current page table.
501221828Sgrehan		 */
502221828Sgrehan		if (ptp[ptpindex] == 0) {
503221828Sgrehan			void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
504221828Sgrehan			ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
505221828Sgrehan		}
506221828Sgrehan
507221828Sgrehan		ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
508221828Sgrehan	}
509221828Sgrehan
510221828Sgrehan	if ((gpa & ((1UL << ptpshift) - 1)) != 0)
511221828Sgrehan		panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
512221828Sgrehan
513221828Sgrehan	/*
514241362Sneel	 * Update the 'gpa' -> 'hpa' mapping
515221828Sgrehan	 */
516241362Sneel	if (remove) {
517241362Sneel		ptp[ptpindex] = 0;
518241362Sneel	} else {
519241362Sneel		ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
520221828Sgrehan
521241362Sneel		if (nlevels > 0)
522241362Sneel			ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
523241362Sneel	}
524221828Sgrehan
525221828Sgrehan	return (1UL << ptpshift);
526221828Sgrehan}
527221828Sgrehan
528241362Sneelstatic uint64_t
529241362Sneelvtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
530241362Sneel{
531241362Sneel
532241362Sneel	return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING));
533241362Sneel}
534241362Sneel
535241362Sneelstatic uint64_t
536241362Sneelvtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
537241362Sneel{
538241362Sneel
539241362Sneel	return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING));
540241362Sneel}
541241362Sneel
542241362Sneelstatic void
543241362Sneelvtd_invalidate_tlb(void *dom)
544241362Sneel{
545241362Sneel	int i;
546241362Sneel	struct vtdmap *vtdmap;
547241362Sneel
548241362Sneel	/*
549241362Sneel	 * Invalidate the IOTLB.
550241362Sneel	 * XXX use domain-selective invalidation for IOTLB
551241362Sneel	 */
552241362Sneel	for (i = 0; i < drhd_num; i++) {
553241362Sneel		vtdmap = vtdmaps[i];
554241362Sneel		vtd_iotlb_global_invalidate(vtdmap);
555241362Sneel	}
556241362Sneel}
557241362Sneel
558221828Sgrehanstatic void *
559221828Sgrehanvtd_create_domain(vm_paddr_t maxaddr)
560221828Sgrehan{
561221828Sgrehan	struct domain *dom;
562221828Sgrehan	vm_paddr_t addr;
563221828Sgrehan	int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
564221828Sgrehan	struct vtdmap *vtdmap;
565221828Sgrehan
566221828Sgrehan	if (drhd_num <= 0)
567221828Sgrehan		panic("vtd_create_domain: no dma remapping hardware available");
568221828Sgrehan
569221828Sgrehan	vtdmap = vtdmaps[0];
570221828Sgrehan
571221828Sgrehan	/*
572221828Sgrehan	 * Calculate AGAW.
573221828Sgrehan	 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
574221828Sgrehan	 */
575221828Sgrehan	addr = 0;
576221828Sgrehan	for (gaw = 0; addr < maxaddr; gaw++)
577221828Sgrehan		addr = 1ULL << gaw;
578221828Sgrehan
579221828Sgrehan	res = (gaw - 12) % 9;
580221828Sgrehan	if (res == 0)
581221828Sgrehan		agaw = gaw;
582221828Sgrehan	else
583221828Sgrehan		agaw = gaw + 9 - res;
584221828Sgrehan
585221828Sgrehan	if (agaw > 64)
586221828Sgrehan		agaw = 64;
587221828Sgrehan
588221828Sgrehan	/*
589221828Sgrehan	 * Select the smallest Supported AGAW and the corresponding number
590221828Sgrehan	 * of page table levels.
591221828Sgrehan	 */
592221828Sgrehan	pt_levels = 2;
593221828Sgrehan	sagaw = 30;
594221828Sgrehan	addrwidth = 0;
595221828Sgrehan	tmp = VTD_CAP_SAGAW(vtdmap->cap);
596221828Sgrehan	for (i = 0; i < 5; i++) {
597221828Sgrehan		if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
598221828Sgrehan			break;
599221828Sgrehan		pt_levels++;
600221828Sgrehan		addrwidth++;
601221828Sgrehan		sagaw += 9;
602221828Sgrehan		if (sagaw > 64)
603221828Sgrehan			sagaw = 64;
604221828Sgrehan	}
605221828Sgrehan
606221828Sgrehan	if (i >= 5) {
607221828Sgrehan		panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
608221828Sgrehan		      VTD_CAP_SAGAW(vtdmap->cap), agaw);
609221828Sgrehan	}
610221828Sgrehan
611221828Sgrehan	dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
612221828Sgrehan	dom->pt_levels = pt_levels;
613221828Sgrehan	dom->addrwidth = addrwidth;
614221828Sgrehan	dom->id = domain_id();
615221828Sgrehan	dom->maxaddr = maxaddr;
616221828Sgrehan	dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
617221828Sgrehan	if ((uintptr_t)dom->ptp & PAGE_MASK)
618221828Sgrehan		panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
619221828Sgrehan
620254549Sneel#ifdef notyet
621254549Sneel	/*
622254549Sneel	 * XXX superpage mappings for the iommu do not work correctly.
623254549Sneel	 *
624254549Sneel	 * By default all physical memory is mapped into the host_domain.
625254549Sneel	 * When a VM is allocated wired memory the pages belonging to it
626254549Sneel	 * are removed from the host_domain and added to the vm's domain.
627254549Sneel	 *
628254549Sneel	 * If the page being removed was mapped using a superpage mapping
629254549Sneel	 * in the host_domain then we need to demote the mapping before
630254549Sneel	 * removing the page.
631254549Sneel	 *
632254549Sneel	 * There is not any code to deal with the demotion at the moment
633254549Sneel	 * so we disable superpage mappings altogether.
634254549Sneel	 */
635254549Sneel	dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
636254549Sneel#endif
637254549Sneel
638221828Sgrehan	SLIST_INSERT_HEAD(&domhead, dom, next);
639221828Sgrehan
640221828Sgrehan	return (dom);
641221828Sgrehan}
642221828Sgrehan
643221828Sgrehanstatic void
644221828Sgrehanvtd_free_ptp(uint64_t *ptp, int level)
645221828Sgrehan{
646221828Sgrehan	int i;
647221828Sgrehan	uint64_t *nlp;
648221828Sgrehan
649221828Sgrehan	if (level > 1) {
650221828Sgrehan		for (i = 0; i < 512; i++) {
651221828Sgrehan			if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
652221828Sgrehan				continue;
653221828Sgrehan			if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
654221828Sgrehan				continue;
655221828Sgrehan			nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
656221828Sgrehan			vtd_free_ptp(nlp, level - 1);
657221828Sgrehan		}
658221828Sgrehan	}
659221828Sgrehan
660221828Sgrehan	bzero(ptp, PAGE_SIZE);
661221828Sgrehan	free(ptp, M_VTD);
662221828Sgrehan}
663221828Sgrehan
664221828Sgrehanstatic void
665221828Sgrehanvtd_destroy_domain(void *arg)
666221828Sgrehan{
667221828Sgrehan	struct domain *dom;
668221828Sgrehan
669221828Sgrehan	dom = arg;
670221828Sgrehan
671221828Sgrehan	SLIST_REMOVE(&domhead, dom, domain, next);
672221828Sgrehan	vtd_free_ptp(dom->ptp, dom->pt_levels);
673221828Sgrehan	free(dom, M_VTD);
674221828Sgrehan}
675221828Sgrehan
676221828Sgrehanstruct iommu_ops iommu_ops_intel = {
677221828Sgrehan	vtd_init,
678221828Sgrehan	vtd_cleanup,
679221828Sgrehan	vtd_enable,
680221828Sgrehan	vtd_disable,
681221828Sgrehan	vtd_create_domain,
682221828Sgrehan	vtd_destroy_domain,
683221828Sgrehan	vtd_create_mapping,
684241362Sneel	vtd_remove_mapping,
685221828Sgrehan	vtd_add_device,
686221828Sgrehan	vtd_remove_device,
687241362Sneel	vtd_invalidate_tlb,
688221828Sgrehan};
689