1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 The FreeBSD Foundation
5 *
6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/malloc.h>
34#include <sys/bus.h>
35#include <sys/interrupt.h>
36#include <sys/kernel.h>
37#include <sys/ktr.h>
38#include <sys/limits.h>
39#include <sys/lock.h>
40#include <sys/memdesc.h>
41#include <sys/mutex.h>
42#include <sys/proc.h>
43#include <sys/rwlock.h>
44#include <sys/rman.h>
45#include <sys/sysctl.h>
46#include <sys/taskqueue.h>
47#include <sys/tree.h>
48#include <sys/uio.h>
49#include <sys/vmem.h>
50#include <vm/vm.h>
51#include <vm/vm_extern.h>
52#include <vm/vm_kern.h>
53#include <vm/vm_object.h>
54#include <vm/vm_page.h>
55#include <vm/vm_pager.h>
56#include <vm/vm_map.h>
57#include <contrib/dev/acpica/include/acpi.h>
58#include <contrib/dev/acpica/include/accommon.h>
59#include <dev/pci/pcireg.h>
60#include <dev/pci/pcivar.h>
61#include <machine/atomic.h>
62#include <machine/bus.h>
63#include <machine/md_var.h>
64#include <machine/specialreg.h>
65#include <x86/include/busdma_impl.h>
66#include <dev/iommu/busdma_iommu.h>
67#include <x86/iommu/intel_reg.h>
68#include <x86/iommu/x86_iommu.h>
69#include <x86/iommu/intel_dmar.h>
70
71static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
72static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
73
74static void dmar_unref_domain_locked(struct dmar_unit *dmar,
75    struct dmar_domain *domain);
76static void dmar_domain_destroy(struct dmar_domain *domain);
77
78static void
79dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
80{
81	struct sf_buf *sf;
82	dmar_root_entry_t *re;
83	vm_page_t ctxm;
84
85	/*
86	 * Allocated context page must be linked.
87	 */
88	ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC);
89	if (ctxm != NULL)
90		return;
91
92	/*
93	 * Page not present, allocate and link.  Note that other
94	 * thread might execute this sequence in parallel.  This
95	 * should be safe, because the context entries written by both
96	 * threads are equal.
97	 */
98	TD_PREP_PINNED_ASSERT;
99	ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO |
100	    IOMMU_PGF_WAITOK);
101	re = iommu_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf);
102	re += bus;
103	dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
104	    VM_PAGE_TO_PHYS(ctxm)));
105	dmar_flush_root_to_ram(dmar, re);
106	iommu_unmap_pgtbl(sf);
107	TD_PINNED_ASSERT;
108}
109
110static dmar_ctx_entry_t *
111dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
112{
113	struct dmar_unit *dmar;
114	dmar_ctx_entry_t *ctxp;
115
116	dmar = CTX2DMAR(ctx);
117
118	ctxp = iommu_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid),
119	    IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp);
120	ctxp += ctx->context.rid & 0xff;
121	return (ctxp);
122}
123
124static void
125device_tag_init(struct dmar_ctx *ctx, device_t dev)
126{
127	struct dmar_domain *domain;
128	bus_addr_t maxaddr;
129
130	domain = CTX2DOM(ctx);
131	maxaddr = MIN(domain->iodom.end, BUS_SPACE_MAXADDR);
132	ctx->context.tag->common.impl = &bus_dma_iommu_impl;
133	ctx->context.tag->common.boundary = 0;
134	ctx->context.tag->common.lowaddr = maxaddr;
135	ctx->context.tag->common.highaddr = maxaddr;
136	ctx->context.tag->common.maxsize = maxaddr;
137	ctx->context.tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
138	ctx->context.tag->common.maxsegsz = maxaddr;
139	ctx->context.tag->ctx = CTX2IOCTX(ctx);
140	ctx->context.tag->owner = dev;
141}
142
143static void
144ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain,
145    vm_page_t ctx_root)
146{
147	/*
148	 * For update due to move, the store is not atomic.  It is
149	 * possible that DMAR read upper doubleword, while low
150	 * doubleword is not yet updated.  The domain id is stored in
151	 * the upper doubleword, while the table pointer in the lower.
152	 *
153	 * There is no good solution, for the same reason it is wrong
154	 * to clear P bit in the ctx entry for update.
155	 */
156	dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
157	    domain->awlvl);
158	if (ctx_root == NULL) {
159		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
160	} else {
161		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
162		    (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
163		    DMAR_CTX1_P);
164	}
165}
166
167static void
168ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move,
169    int busno)
170{
171	struct dmar_unit *unit;
172	struct dmar_domain *domain;
173	vm_page_t ctx_root;
174	int i;
175
176	domain = CTX2DOM(ctx);
177	unit = DOM2DMAR(domain);
178	KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
179	    ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
180	    unit->iommu.unit, busno, pci_get_slot(ctx->context.tag->owner),
181	    pci_get_function(ctx->context.tag->owner),
182	    ctxp->ctx1, ctxp->ctx2));
183
184	if ((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 &&
185	    (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
186		KASSERT(domain->pgtbl_obj == NULL,
187		    ("ctx %p non-null pgtbl_obj", ctx));
188		ctx_root = NULL;
189	} else {
190		ctx_root = iommu_pgalloc(domain->pgtbl_obj, 0,
191		    IOMMU_PGF_NOALLOC);
192	}
193
194	if (iommu_is_buswide_ctx(DMAR2IOMMU(unit), busno)) {
195		MPASS(!move);
196		for (i = 0; i <= PCI_BUSMAX; i++) {
197			ctx_id_entry_init_one(&ctxp[i], domain, ctx_root);
198		}
199	} else {
200		ctx_id_entry_init_one(ctxp, domain, ctx_root);
201	}
202	dmar_flush_ctx_to_ram(unit, ctxp);
203}
204
205static int
206dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
207{
208	int error;
209
210	/*
211	 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
212	 * Mode Consideration" and do the (global) invalidation of the
213	 * negative TLB entries.
214	 */
215	if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
216		return (0);
217	if (dmar->qi_enabled) {
218		dmar_qi_invalidate_ctx_glob_locked(dmar);
219		if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
220			dmar_qi_invalidate_iotlb_glob_locked(dmar);
221		return (0);
222	}
223	error = dmar_inv_ctx_glob(dmar);
224	if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
225		error = dmar_inv_iotlb_glob(dmar);
226	return (error);
227}
228
229static int
230domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus,
231    int slot, int func, int dev_domain, int dev_busno,
232    const void *dev_path, int dev_path_len)
233{
234	struct iommu_map_entries_tailq rmrr_entries;
235	struct iommu_map_entry *entry, *entry1;
236	vm_page_t *ma;
237	iommu_gaddr_t start, end;
238	vm_pindex_t size, i;
239	int error, error1;
240
241	if (!dmar_rmrr_enable)
242		return (0);
243
244	error = 0;
245	TAILQ_INIT(&rmrr_entries);
246	dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path,
247	    dev_path_len, &rmrr_entries);
248	TAILQ_FOREACH_SAFE(entry, &rmrr_entries, dmamap_link, entry1) {
249		/*
250		 * VT-d specification requires that the start of an
251		 * RMRR entry is 4k-aligned.  Buggy BIOSes put
252		 * anything into the start and end fields.  Truncate
253		 * and round as neccesary.
254		 *
255		 * We also allow the overlapping RMRR entries, see
256		 * iommu_gas_alloc_region().
257		 */
258		start = entry->start;
259		end = entry->end;
260		if (bootverbose)
261			printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n",
262			    domain->iodom.iommu->unit, bus, slot, func,
263			    (uintmax_t)start, (uintmax_t)end);
264		entry->start = trunc_page(start);
265		entry->end = round_page(end);
266		if (entry->start == entry->end) {
267			/* Workaround for some AMI (?) BIOSes */
268			if (bootverbose) {
269				if (dev != NULL)
270					device_printf(dev, "");
271				printf("pci%d:%d:%d ", bus, slot, func);
272				printf("BIOS bug: dmar%d RMRR "
273				    "region (%jx, %jx) corrected\n",
274				    domain->iodom.iommu->unit, start, end);
275			}
276			entry->end += IOMMU_PAGE_SIZE * 0x20;
277		}
278		size = OFF_TO_IDX(entry->end - entry->start);
279		ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
280		for (i = 0; i < size; i++) {
281			ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
282			    VM_MEMATTR_DEFAULT);
283		}
284		error1 = iommu_gas_map_region(DOM2IODOM(domain), entry,
285		    IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE,
286		    IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma);
287		/*
288		 * Non-failed RMRR entries are owned by context rb
289		 * tree.  Get rid of the failed entry, but do not stop
290		 * the loop.  Rest of the parsed RMRR entries are
291		 * loaded and removed on the context destruction.
292		 */
293		if (error1 == 0 && entry->end != entry->start) {
294			IOMMU_LOCK(domain->iodom.iommu);
295			domain->refs++; /* XXXKIB prevent free */
296			domain->iodom.flags |= IOMMU_DOMAIN_RMRR;
297			IOMMU_UNLOCK(domain->iodom.iommu);
298		} else {
299			if (error1 != 0) {
300				if (dev != NULL)
301					device_printf(dev, "");
302				printf("pci%d:%d:%d ", bus, slot, func);
303				printf(
304			    "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
305				    domain->iodom.iommu->unit, start, end,
306				    error1);
307				error = error1;
308			}
309			TAILQ_REMOVE(&rmrr_entries, entry, dmamap_link);
310			iommu_gas_free_entry(entry);
311		}
312		for (i = 0; i < size; i++)
313			vm_page_putfake(ma[i]);
314		free(ma, M_TEMP);
315	}
316	return (error);
317}
318
319/*
320 * PCI memory address space is shared between memory-mapped devices (MMIO) and
321 * host memory (which may be remapped by an IOMMU).  Device accesses to an
322 * address within a memory aperture in a PCIe root port will be treated as
323 * peer-to-peer and not forwarded to an IOMMU.  To avoid this, reserve the
324 * address space of the root port's memory apertures in the address space used
325 * by the IOMMU for remapping.
326 */
327static int
328dmar_reserve_pci_regions(struct dmar_domain *domain, device_t dev)
329{
330	struct iommu_domain *iodom;
331	device_t root;
332	uint32_t val;
333	uint64_t base, limit;
334	int error;
335
336	iodom = DOM2IODOM(domain);
337
338	root = pci_find_pcie_root_port(dev);
339	if (root == NULL)
340		return (0);
341
342	/* Disable downstream memory */
343	base = PCI_PPBMEMBASE(0, pci_read_config(root, PCIR_MEMBASE_1, 2));
344	limit = PCI_PPBMEMLIMIT(0, pci_read_config(root, PCIR_MEMLIMIT_1, 2));
345	error = iommu_gas_reserve_region_extend(iodom, base, limit + 1);
346	if (bootverbose || error != 0)
347		device_printf(dev, "DMAR reserve [%#jx-%#jx] (error %d)\n",
348		    base, limit + 1, error);
349	if (error != 0)
350		return (error);
351
352	/* Disable downstream prefetchable memory */
353	val = pci_read_config(root, PCIR_PMBASEL_1, 2);
354	if (val != 0 || pci_read_config(root, PCIR_PMLIMITL_1, 2) != 0) {
355		if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) {
356			base = PCI_PPBMEMBASE(
357			    pci_read_config(root, PCIR_PMBASEH_1, 4),
358			    val);
359			limit = PCI_PPBMEMLIMIT(
360			    pci_read_config(root, PCIR_PMLIMITH_1, 4),
361			    pci_read_config(root, PCIR_PMLIMITL_1, 2));
362		} else {
363			base = PCI_PPBMEMBASE(0, val);
364			limit = PCI_PPBMEMLIMIT(0,
365			    pci_read_config(root, PCIR_PMLIMITL_1, 2));
366		}
367		error = iommu_gas_reserve_region_extend(iodom, base,
368		    limit + 1);
369		if (bootverbose || error != 0)
370			device_printf(dev, "DMAR reserve [%#jx-%#jx] "
371			    "(error %d)\n", base, limit + 1, error);
372		if (error != 0)
373			return (error);
374	}
375
376	return (error);
377}
378
379static struct dmar_domain *
380dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
381{
382	struct iommu_domain *iodom;
383	struct iommu_unit *unit;
384	struct dmar_domain *domain;
385	int error, id, mgaw;
386
387	id = alloc_unr(dmar->domids);
388	if (id == -1)
389		return (NULL);
390	domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
391	iodom = DOM2IODOM(domain);
392	unit = DMAR2IOMMU(dmar);
393	domain->domain = id;
394	LIST_INIT(&domain->contexts);
395	iommu_domain_init(unit, iodom, &dmar_domain_map_ops);
396
397	domain->dmar = dmar;
398
399	/*
400	 * For now, use the maximal usable physical address of the
401	 * installed memory to calculate the mgaw on id_mapped domain.
402	 * It is useful for the identity mapping, and less so for the
403	 * virtualized bus address space.
404	 */
405	domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
406	mgaw = dmar_maxaddr2mgaw(dmar, domain->iodom.end, !id_mapped);
407	error = domain_set_agaw(domain, mgaw);
408	if (error != 0)
409		goto fail;
410	if (!id_mapped)
411		/* Use all supported address space for remapping. */
412		domain->iodom.end = 1ULL << (domain->agaw - 1);
413
414	iommu_gas_init_domain(DOM2IODOM(domain));
415
416	if (id_mapped) {
417		if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
418			domain->pgtbl_obj = domain_get_idmap_pgtbl(domain,
419			    domain->iodom.end);
420		}
421		domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
422	} else {
423		error = domain_alloc_pgtbl(domain);
424		if (error != 0)
425			goto fail;
426		/* Disable local apic region access */
427		error = iommu_gas_reserve_region(iodom, 0xfee00000,
428		    0xfeefffff + 1, &iodom->msi_entry);
429		if (error != 0)
430			goto fail;
431	}
432	return (domain);
433
434fail:
435	dmar_domain_destroy(domain);
436	return (NULL);
437}
438
439static struct dmar_ctx *
440dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
441{
442	struct dmar_ctx *ctx;
443
444	ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
445	ctx->context.domain = DOM2IODOM(domain);
446	ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
447	    M_DMAR_CTX, M_WAITOK | M_ZERO);
448	ctx->context.rid = rid;
449	ctx->refs = 1;
450	return (ctx);
451}
452
453static void
454dmar_ctx_link(struct dmar_ctx *ctx)
455{
456	struct dmar_domain *domain;
457
458	domain = CTX2DOM(ctx);
459	IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
460	KASSERT(domain->refs >= domain->ctx_cnt,
461	    ("dom %p ref underflow %d %d", domain, domain->refs,
462	    domain->ctx_cnt));
463	domain->refs++;
464	domain->ctx_cnt++;
465	LIST_INSERT_HEAD(&domain->contexts, ctx, link);
466}
467
468static void
469dmar_ctx_unlink(struct dmar_ctx *ctx)
470{
471	struct dmar_domain *domain;
472
473	domain = CTX2DOM(ctx);
474	IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
475	KASSERT(domain->refs > 0,
476	    ("domain %p ctx dtr refs %d", domain, domain->refs));
477	KASSERT(domain->ctx_cnt >= domain->refs,
478	    ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
479	    domain->refs, domain->ctx_cnt));
480	domain->refs--;
481	domain->ctx_cnt--;
482	LIST_REMOVE(ctx, link);
483}
484
485static void
486dmar_domain_destroy(struct dmar_domain *domain)
487{
488	struct iommu_domain *iodom;
489	struct dmar_unit *dmar;
490
491	iodom = DOM2IODOM(domain);
492
493	KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
494	    ("unfinished unloads %p", domain));
495	KASSERT(LIST_EMPTY(&domain->contexts),
496	    ("destroying dom %p with contexts", domain));
497	KASSERT(domain->ctx_cnt == 0,
498	    ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
499	KASSERT(domain->refs == 0,
500	    ("destroying dom %p with refs %d", domain, domain->refs));
501	if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
502		DMAR_DOMAIN_LOCK(domain);
503		iommu_gas_fini_domain(iodom);
504		DMAR_DOMAIN_UNLOCK(domain);
505	}
506	if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
507		if (domain->pgtbl_obj != NULL)
508			DMAR_DOMAIN_PGLOCK(domain);
509		domain_free_pgtbl(domain);
510	}
511	iommu_domain_fini(iodom);
512	dmar = DOM2DMAR(domain);
513	free_unr(dmar->domids, domain->domain);
514	free(domain, M_DMAR_DOMAIN);
515}
516
517static struct dmar_ctx *
518dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid,
519    int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
520    bool id_mapped, bool rmrr_init)
521{
522	struct dmar_domain *domain, *domain1;
523	struct dmar_ctx *ctx, *ctx1;
524	struct iommu_unit *unit __diagused;
525	dmar_ctx_entry_t *ctxp;
526	struct sf_buf *sf;
527	int bus, slot, func, error;
528	bool enable;
529
530	if (dev != NULL) {
531		bus = pci_get_bus(dev);
532		slot = pci_get_slot(dev);
533		func = pci_get_function(dev);
534	} else {
535		bus = PCI_RID2BUS(rid);
536		slot = PCI_RID2SLOT(rid);
537		func = PCI_RID2FUNC(rid);
538	}
539	enable = false;
540	TD_PREP_PINNED_ASSERT;
541	unit = DMAR2IOMMU(dmar);
542	DMAR_LOCK(dmar);
543	KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0),
544	    ("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus,
545	    slot, func));
546	ctx = dmar_find_ctx_locked(dmar, rid);
547	error = 0;
548	if (ctx == NULL) {
549		/*
550		 * Perform the allocations which require sleep or have
551		 * higher chance to succeed if the sleep is allowed.
552		 */
553		DMAR_UNLOCK(dmar);
554		dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
555		domain1 = dmar_domain_alloc(dmar, id_mapped);
556		if (domain1 == NULL) {
557			TD_PINNED_ASSERT;
558			return (NULL);
559		}
560		if (!id_mapped) {
561			error = domain_init_rmrr(domain1, dev, bus,
562			    slot, func, dev_domain, dev_busno, dev_path,
563			    dev_path_len);
564			if (error == 0 && dev != NULL)
565				error = dmar_reserve_pci_regions(domain1, dev);
566			if (error != 0) {
567				dmar_domain_destroy(domain1);
568				TD_PINNED_ASSERT;
569				return (NULL);
570			}
571		}
572		ctx1 = dmar_ctx_alloc(domain1, rid);
573		ctxp = dmar_map_ctx_entry(ctx1, &sf);
574		DMAR_LOCK(dmar);
575
576		/*
577		 * Recheck the contexts, other thread might have
578		 * already allocated needed one.
579		 */
580		ctx = dmar_find_ctx_locked(dmar, rid);
581		if (ctx == NULL) {
582			domain = domain1;
583			ctx = ctx1;
584			dmar_ctx_link(ctx);
585			ctx->context.tag->owner = dev;
586			device_tag_init(ctx, dev);
587
588			/*
589			 * This is the first activated context for the
590			 * DMAR unit.  Enable the translation after
591			 * everything is set up.
592			 */
593			if (LIST_EMPTY(&dmar->domains))
594				enable = true;
595			LIST_INSERT_HEAD(&dmar->domains, domain, link);
596			ctx_id_entry_init(ctx, ctxp, false, bus);
597			if (dev != NULL) {
598				device_printf(dev,
599			    "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
600				    "agaw %d %s-mapped\n",
601				    dmar->iommu.unit, dmar->segment, bus, slot,
602				    func, rid, domain->domain, domain->mgaw,
603				    domain->agaw, id_mapped ? "id" : "re");
604			}
605			iommu_unmap_pgtbl(sf);
606		} else {
607			iommu_unmap_pgtbl(sf);
608			dmar_domain_destroy(domain1);
609			/* Nothing needs to be done to destroy ctx1. */
610			free(ctx1, M_DMAR_CTX);
611			domain = CTX2DOM(ctx);
612			ctx->refs++; /* tag referenced us */
613		}
614	} else {
615		domain = CTX2DOM(ctx);
616		if (ctx->context.tag->owner == NULL)
617			ctx->context.tag->owner = dev;
618		ctx->refs++; /* tag referenced us */
619	}
620
621	error = dmar_flush_for_ctx_entry(dmar, enable);
622	if (error != 0) {
623		dmar_free_ctx_locked(dmar, ctx);
624		TD_PINNED_ASSERT;
625		return (NULL);
626	}
627
628	/*
629	 * The dmar lock was potentially dropped between check for the
630	 * empty context list and now.  Recheck the state of GCMD_TE
631	 * to avoid unneeded command.
632	 */
633	if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
634		error = dmar_disable_protected_regions(dmar);
635		if (error != 0)
636			printf("dmar%d: Failed to disable protected regions\n",
637			    dmar->iommu.unit);
638		error = dmar_enable_translation(dmar);
639		if (error == 0) {
640			if (bootverbose) {
641				printf("dmar%d: enabled translation\n",
642				    dmar->iommu.unit);
643			}
644		} else {
645			printf("dmar%d: enabling translation failed, "
646			    "error %d\n", dmar->iommu.unit, error);
647			dmar_free_ctx_locked(dmar, ctx);
648			TD_PINNED_ASSERT;
649			return (NULL);
650		}
651	}
652	DMAR_UNLOCK(dmar);
653	TD_PINNED_ASSERT;
654	return (ctx);
655}
656
657struct dmar_ctx *
658dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
659    bool id_mapped, bool rmrr_init)
660{
661	int dev_domain, dev_path_len, dev_busno;
662
663	dev_domain = pci_get_domain(dev);
664	dev_path_len = dmar_dev_depth(dev);
665	ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
666	dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
667	return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno,
668	    dev_path, dev_path_len, id_mapped, rmrr_init));
669}
670
671struct dmar_ctx *
672dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
673    int dev_domain, int dev_busno,
674    const void *dev_path, int dev_path_len,
675    bool id_mapped, bool rmrr_init)
676{
677
678	return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno,
679	    dev_path, dev_path_len, id_mapped, rmrr_init));
680}
681
682int
683dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
684{
685	struct dmar_unit *dmar;
686	struct dmar_domain *old_domain;
687	dmar_ctx_entry_t *ctxp;
688	struct sf_buf *sf;
689	int error;
690
691	dmar = domain->dmar;
692	old_domain = CTX2DOM(ctx);
693	if (domain == old_domain)
694		return (0);
695	KASSERT(old_domain->iodom.iommu == domain->iodom.iommu,
696	    ("domain %p %u moving between dmars %u %u", domain,
697	    domain->domain, old_domain->iodom.iommu->unit,
698	    domain->iodom.iommu->unit));
699	TD_PREP_PINNED_ASSERT;
700
701	ctxp = dmar_map_ctx_entry(ctx, &sf);
702	DMAR_LOCK(dmar);
703	dmar_ctx_unlink(ctx);
704	ctx->context.domain = &domain->iodom;
705	dmar_ctx_link(ctx);
706	ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100);
707	iommu_unmap_pgtbl(sf);
708	error = dmar_flush_for_ctx_entry(dmar, true);
709	/* If flush failed, rolling back would not work as well. */
710	printf("dmar%d rid %x domain %d->%d %s-mapped\n",
711	    dmar->iommu.unit, ctx->context.rid, old_domain->domain,
712	    domain->domain, (domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 ?
713	    "id" : "re");
714	dmar_unref_domain_locked(dmar, old_domain);
715	TD_PINNED_ASSERT;
716	return (error);
717}
718
719static void
720dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
721{
722
723	DMAR_ASSERT_LOCKED(dmar);
724	KASSERT(domain->refs >= 1,
725	    ("dmar %d domain %p refs %u", dmar->iommu.unit, domain,
726	    domain->refs));
727	KASSERT(domain->refs > domain->ctx_cnt,
728	    ("dmar %d domain %p refs %d ctx_cnt %d", dmar->iommu.unit, domain,
729	    domain->refs, domain->ctx_cnt));
730
731	if (domain->refs > 1) {
732		domain->refs--;
733		DMAR_UNLOCK(dmar);
734		return;
735	}
736
737	KASSERT((domain->iodom.flags & IOMMU_DOMAIN_RMRR) == 0,
738	    ("lost ref on RMRR domain %p", domain));
739
740	LIST_REMOVE(domain, link);
741	DMAR_UNLOCK(dmar);
742
743	taskqueue_drain(dmar->iommu.delayed_taskqueue,
744	    &domain->iodom.unload_task);
745	dmar_domain_destroy(domain);
746}
747
748void
749dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
750{
751	struct sf_buf *sf;
752	dmar_ctx_entry_t *ctxp;
753	struct dmar_domain *domain;
754
755	DMAR_ASSERT_LOCKED(dmar);
756	KASSERT(ctx->refs >= 1,
757	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
758
759	/*
760	 * If our reference is not last, only the dereference should
761	 * be performed.
762	 */
763	if (ctx->refs > 1) {
764		ctx->refs--;
765		DMAR_UNLOCK(dmar);
766		return;
767	}
768
769	KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
770	    ("lost ref on disabled ctx %p", ctx));
771
772	/*
773	 * Otherwise, the context entry must be cleared before the
774	 * page table is destroyed.  The mapping of the context
775	 * entries page could require sleep, unlock the dmar.
776	 */
777	DMAR_UNLOCK(dmar);
778	TD_PREP_PINNED_ASSERT;
779	ctxp = dmar_map_ctx_entry(ctx, &sf);
780	DMAR_LOCK(dmar);
781	KASSERT(ctx->refs >= 1,
782	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
783
784	/*
785	 * Other thread might have referenced the context, in which
786	 * case again only the dereference should be performed.
787	 */
788	if (ctx->refs > 1) {
789		ctx->refs--;
790		DMAR_UNLOCK(dmar);
791		iommu_unmap_pgtbl(sf);
792		TD_PINNED_ASSERT;
793		return;
794	}
795
796	KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
797	    ("lost ref on disabled ctx %p", ctx));
798
799	/*
800	 * Clear the context pointer and flush the caches.
801	 * XXXKIB: cannot do this if any RMRR entries are still present.
802	 */
803	dmar_pte_clear(&ctxp->ctx1);
804	ctxp->ctx2 = 0;
805	dmar_flush_ctx_to_ram(dmar, ctxp);
806	dmar_inv_ctx_glob(dmar);
807	if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
808		if (dmar->qi_enabled)
809			dmar_qi_invalidate_iotlb_glob_locked(dmar);
810		else
811			dmar_inv_iotlb_glob(dmar);
812	}
813	iommu_unmap_pgtbl(sf);
814	domain = CTX2DOM(ctx);
815	dmar_ctx_unlink(ctx);
816	free(ctx->context.tag, M_DMAR_CTX);
817	free(ctx, M_DMAR_CTX);
818	dmar_unref_domain_locked(dmar, domain);
819	TD_PINNED_ASSERT;
820}
821
822void
823dmar_free_ctx(struct dmar_ctx *ctx)
824{
825	struct dmar_unit *dmar;
826
827	dmar = CTX2DMAR(ctx);
828	DMAR_LOCK(dmar);
829	dmar_free_ctx_locked(dmar, ctx);
830}
831
832/*
833 * Returns with the domain locked.
834 */
835struct dmar_ctx *
836dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
837{
838	struct dmar_domain *domain;
839	struct dmar_ctx *ctx;
840
841	DMAR_ASSERT_LOCKED(dmar);
842
843	LIST_FOREACH(domain, &dmar->domains, link) {
844		LIST_FOREACH(ctx, &domain->contexts, link) {
845			if (ctx->context.rid == rid)
846				return (ctx);
847		}
848	}
849	return (NULL);
850}
851
852void
853dmar_domain_free_entry(struct iommu_map_entry *entry, bool free)
854{
855	if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
856		iommu_gas_free_region(entry);
857	else
858		iommu_gas_free_space(entry);
859	if (free)
860		iommu_gas_free_entry(entry);
861	else
862		entry->flags = 0;
863}
864
865/*
866 * If the given value for "free" is true, then the caller must not be using
867 * the entry's dmamap_link field.
868 */
869void
870iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
871    bool cansleep)
872{
873	struct dmar_domain *domain;
874	struct dmar_unit *unit;
875
876	domain = IODOM2DOM(entry->domain);
877	unit = DOM2DMAR(domain);
878
879	/*
880	 * If "free" is false, then the IOTLB invalidation must be performed
881	 * synchronously.  Otherwise, the caller might free the entry before
882	 * dmar_qi_task() is finished processing it.
883	 */
884	if (unit->qi_enabled) {
885		if (free) {
886			DMAR_LOCK(unit);
887			dmar_qi_invalidate_locked(domain, entry, true);
888			DMAR_UNLOCK(unit);
889		} else {
890			dmar_qi_invalidate_sync(domain, entry->start,
891			    entry->end - entry->start, cansleep);
892			dmar_domain_free_entry(entry, false);
893		}
894	} else {
895		domain_flush_iotlb_sync(domain, entry->start, entry->end -
896		    entry->start);
897		dmar_domain_free_entry(entry, free);
898	}
899}
900
901static bool
902dmar_domain_unload_emit_wait(struct dmar_domain *domain,
903    struct iommu_map_entry *entry)
904{
905
906	if (TAILQ_NEXT(entry, dmamap_link) == NULL)
907		return (true);
908	return (domain->batch_no++ % dmar_batch_coalesce == 0);
909}
910
911void
912iommu_domain_unload(struct iommu_domain *iodom,
913    struct iommu_map_entries_tailq *entries, bool cansleep)
914{
915	struct dmar_domain *domain;
916	struct dmar_unit *unit;
917	struct iommu_map_entry *entry, *entry1;
918	int error __diagused;
919
920	domain = IODOM2DOM(iodom);
921	unit = DOM2DMAR(domain);
922
923	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
924		KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
925		    ("not mapped entry %p %p", domain, entry));
926		error = iodom->ops->unmap(iodom, entry->start, entry->end -
927		    entry->start, cansleep ? IOMMU_PGF_WAITOK : 0);
928		KASSERT(error == 0, ("unmap %p error %d", domain, error));
929		if (!unit->qi_enabled) {
930			domain_flush_iotlb_sync(domain, entry->start,
931			    entry->end - entry->start);
932			TAILQ_REMOVE(entries, entry, dmamap_link);
933			dmar_domain_free_entry(entry, true);
934		}
935	}
936	if (TAILQ_EMPTY(entries))
937		return;
938
939	KASSERT(unit->qi_enabled, ("loaded entry left"));
940	DMAR_LOCK(unit);
941	while ((entry = TAILQ_FIRST(entries)) != NULL) {
942		TAILQ_REMOVE(entries, entry, dmamap_link);
943		dmar_qi_invalidate_locked(domain, entry,
944		    dmar_domain_unload_emit_wait(domain, entry));
945	}
946	DMAR_UNLOCK(unit);
947}
948
949struct iommu_ctx *
950iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
951    bool id_mapped, bool rmrr_init)
952{
953	struct dmar_unit *dmar;
954	struct dmar_ctx *ret;
955
956	dmar = IOMMU2DMAR(iommu);
957
958	ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init);
959
960	return (CTX2IOCTX(ret));
961}
962
963void
964iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context)
965{
966	struct dmar_unit *dmar;
967	struct dmar_ctx *ctx;
968
969	dmar = IOMMU2DMAR(iommu);
970	ctx = IOCTX2CTX(context);
971
972	dmar_free_ctx_locked(dmar, ctx);
973}
974
975void
976iommu_free_ctx(struct iommu_ctx *context)
977{
978	struct dmar_ctx *ctx;
979
980	ctx = IOCTX2CTX(context);
981
982	dmar_free_ctx(ctx);
983}
984