1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2013 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
8 * under sponsorship from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD$");
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/malloc.h>
38#include <sys/bus.h>
39#include <sys/interrupt.h>
40#include <sys/kernel.h>
41#include <sys/ktr.h>
42#include <sys/limits.h>
43#include <sys/lock.h>
44#include <sys/memdesc.h>
45#include <sys/mutex.h>
46#include <sys/proc.h>
47#include <sys/rwlock.h>
48#include <sys/rman.h>
49#include <sys/sysctl.h>
50#include <sys/taskqueue.h>
51#include <sys/tree.h>
52#include <sys/uio.h>
53#include <sys/vmem.h>
54#include <vm/vm.h>
55#include <vm/vm_extern.h>
56#include <vm/vm_kern.h>
57#include <vm/vm_object.h>
58#include <vm/vm_page.h>
59#include <vm/vm_pager.h>
60#include <vm/vm_map.h>
61#include <contrib/dev/acpica/include/acpi.h>
62#include <contrib/dev/acpica/include/accommon.h>
63#include <dev/pci/pcireg.h>
64#include <dev/pci/pcivar.h>
65#include <machine/atomic.h>
66#include <machine/bus.h>
67#include <machine/md_var.h>
68#include <machine/specialreg.h>
69#include <x86/include/busdma_impl.h>
70#include <dev/iommu/busdma_iommu.h>
71#include <x86/iommu/intel_reg.h>
72#include <x86/iommu/intel_dmar.h>
73
74static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
75static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
76
77static void dmar_unref_domain_locked(struct dmar_unit *dmar,
78    struct dmar_domain *domain);
79static void dmar_domain_destroy(struct dmar_domain *domain);
80
81static void
82dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
83{
84	struct sf_buf *sf;
85	dmar_root_entry_t *re;
86	vm_page_t ctxm;
87
88	/*
89	 * Allocated context page must be linked.
90	 */
91	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC);
92	if (ctxm != NULL)
93		return;
94
95	/*
96	 * Page not present, allocate and link.  Note that other
97	 * thread might execute this sequence in parallel.  This
98	 * should be safe, because the context entries written by both
99	 * threads are equal.
100	 */
101	TD_PREP_PINNED_ASSERT;
102	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO |
103	    IOMMU_PGF_WAITOK);
104	re = dmar_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf);
105	re += bus;
106	dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
107	    VM_PAGE_TO_PHYS(ctxm)));
108	dmar_flush_root_to_ram(dmar, re);
109	dmar_unmap_pgtbl(sf);
110	TD_PINNED_ASSERT;
111}
112
113static dmar_ctx_entry_t *
114dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
115{
116	struct dmar_unit *dmar;
117	dmar_ctx_entry_t *ctxp;
118
119	dmar = CTX2DMAR(ctx);
120
121	ctxp = dmar_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid),
122	    IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp);
123	ctxp += ctx->context.rid & 0xff;
124	return (ctxp);
125}
126
127static void
128device_tag_init(struct dmar_ctx *ctx, device_t dev)
129{
130	struct dmar_domain *domain;
131	bus_addr_t maxaddr;
132
133	domain = CTX2DOM(ctx);
134	maxaddr = MIN(domain->iodom.end, BUS_SPACE_MAXADDR);
135	ctx->context.tag->common.ref_count = 1; /* Prevent free */
136	ctx->context.tag->common.impl = &bus_dma_iommu_impl;
137	ctx->context.tag->common.boundary = 0;
138	ctx->context.tag->common.lowaddr = maxaddr;
139	ctx->context.tag->common.highaddr = maxaddr;
140	ctx->context.tag->common.maxsize = maxaddr;
141	ctx->context.tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
142	ctx->context.tag->common.maxsegsz = maxaddr;
143	ctx->context.tag->ctx = CTX2IOCTX(ctx);
144	ctx->context.tag->owner = dev;
145}
146
147static void
148ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain,
149    vm_page_t ctx_root)
150{
151	/*
152	 * For update due to move, the store is not atomic.  It is
153	 * possible that DMAR read upper doubleword, while low
154	 * doubleword is not yet updated.  The domain id is stored in
155	 * the upper doubleword, while the table pointer in the lower.
156	 *
157	 * There is no good solution, for the same reason it is wrong
158	 * to clear P bit in the ctx entry for update.
159	 */
160	dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
161	    domain->awlvl);
162	if (ctx_root == NULL) {
163		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
164	} else {
165		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
166		    (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
167		    DMAR_CTX1_P);
168	}
169}
170
171static void
172ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move,
173    int busno)
174{
175	struct dmar_unit *unit;
176	struct dmar_domain *domain;
177	vm_page_t ctx_root;
178	int i;
179
180	domain = CTX2DOM(ctx);
181	unit = DOM2DMAR(domain);
182	KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
183	    ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
184	    unit->iommu.unit, busno, pci_get_slot(ctx->context.tag->owner),
185	    pci_get_function(ctx->context.tag->owner),
186	    ctxp->ctx1, ctxp->ctx2));
187
188	if ((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 &&
189	    (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
190		KASSERT(domain->pgtbl_obj == NULL,
191		    ("ctx %p non-null pgtbl_obj", ctx));
192		ctx_root = NULL;
193	} else {
194		ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0,
195		    IOMMU_PGF_NOALLOC);
196	}
197
198	if (iommu_is_buswide_ctx(DMAR2IOMMU(unit), busno)) {
199		MPASS(!move);
200		for (i = 0; i <= PCI_BUSMAX; i++) {
201			ctx_id_entry_init_one(&ctxp[i], domain, ctx_root);
202		}
203	} else {
204		ctx_id_entry_init_one(ctxp, domain, ctx_root);
205	}
206	dmar_flush_ctx_to_ram(unit, ctxp);
207}
208
209static int
210dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
211{
212	int error;
213
214	/*
215	 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
216	 * Mode Consideration" and do the (global) invalidation of the
217	 * negative TLB entries.
218	 */
219	if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
220		return (0);
221	if (dmar->qi_enabled) {
222		dmar_qi_invalidate_ctx_glob_locked(dmar);
223		if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
224			dmar_qi_invalidate_iotlb_glob_locked(dmar);
225		return (0);
226	}
227	error = dmar_inv_ctx_glob(dmar);
228	if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
229		error = dmar_inv_iotlb_glob(dmar);
230	return (error);
231}
232
233static int
234domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus,
235    int slot, int func, int dev_domain, int dev_busno,
236    const void *dev_path, int dev_path_len)
237{
238	struct iommu_map_entries_tailq rmrr_entries;
239	struct iommu_map_entry *entry, *entry1;
240	vm_page_t *ma;
241	iommu_gaddr_t start, end;
242	vm_pindex_t size, i;
243	int error, error1;
244
245	error = 0;
246	TAILQ_INIT(&rmrr_entries);
247	dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path,
248	    dev_path_len, &rmrr_entries);
249	TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) {
250		/*
251		 * VT-d specification requires that the start of an
252		 * RMRR entry is 4k-aligned.  Buggy BIOSes put
253		 * anything into the start and end fields.  Truncate
254		 * and round as neccesary.
255		 *
256		 * We also allow the overlapping RMRR entries, see
257		 * iommu_gas_alloc_region().
258		 */
259		start = entry->start;
260		end = entry->end;
261		if (bootverbose)
262			printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n",
263			    domain->iodom.iommu->unit, bus, slot, func,
264			    (uintmax_t)start, (uintmax_t)end);
265		entry->start = trunc_page(start);
266		entry->end = round_page(end);
267		if (entry->start == entry->end) {
268			/* Workaround for some AMI (?) BIOSes */
269			if (bootverbose) {
270				if (dev != NULL)
271					device_printf(dev, "");
272				printf("pci%d:%d:%d ", bus, slot, func);
273				printf("BIOS bug: dmar%d RMRR "
274				    "region (%jx, %jx) corrected\n",
275				    domain->iodom.iommu->unit, start, end);
276			}
277			entry->end += DMAR_PAGE_SIZE * 0x20;
278		}
279		size = OFF_TO_IDX(entry->end - entry->start);
280		ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
281		for (i = 0; i < size; i++) {
282			ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
283			    VM_MEMATTR_DEFAULT);
284		}
285		error1 = iommu_gas_map_region(DOM2IODOM(domain), entry,
286		    IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE,
287		    IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma);
288		/*
289		 * Non-failed RMRR entries are owned by context rb
290		 * tree.  Get rid of the failed entry, but do not stop
291		 * the loop.  Rest of the parsed RMRR entries are
292		 * loaded and removed on the context destruction.
293		 */
294		if (error1 == 0 && entry->end != entry->start) {
295			IOMMU_LOCK(domain->iodom.iommu);
296			domain->refs++; /* XXXKIB prevent free */
297			domain->iodom.flags |= IOMMU_DOMAIN_RMRR;
298			IOMMU_UNLOCK(domain->iodom.iommu);
299		} else {
300			if (error1 != 0) {
301				if (dev != NULL)
302					device_printf(dev, "");
303				printf("pci%d:%d:%d ", bus, slot, func);
304				printf(
305			    "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
306				    domain->iodom.iommu->unit, start, end,
307				    error1);
308				error = error1;
309			}
310			TAILQ_REMOVE(&rmrr_entries, entry, unroll_link);
311			iommu_gas_free_entry(DOM2IODOM(domain), entry);
312		}
313		for (i = 0; i < size; i++)
314			vm_page_putfake(ma[i]);
315		free(ma, M_TEMP);
316	}
317	return (error);
318}
319
320/*
321 * PCI memory address space is shared between memory-mapped devices (MMIO) and
322 * host memory (which may be remapped by an IOMMU).  Device accesses to an
323 * address within a memory aperture in a PCIe root port will be treated as
324 * peer-to-peer and not forwarded to an IOMMU.  To avoid this, reserve the
325 * address space of the root port's memory apertures in the address space used
326 * by the IOMMU for remapping.
327 */
328static int
329dmar_reserve_pci_regions(struct dmar_domain *domain, device_t dev)
330{
331	struct iommu_domain *iodom;
332	device_t root;
333	uint32_t val;
334	uint64_t base, limit;
335	int error;
336
337	iodom = DOM2IODOM(domain);
338
339	root = pci_find_pcie_root_port(dev);
340	if (root == NULL)
341		return (0);
342
343	/* Disable downstream memory */
344	base = PCI_PPBMEMBASE(0, pci_read_config(root, PCIR_MEMBASE_1, 2));
345	limit = PCI_PPBMEMLIMIT(0, pci_read_config(root, PCIR_MEMLIMIT_1, 2));
346	error = iommu_gas_reserve_region_extend(iodom, base, limit + 1);
347	if (bootverbose || error != 0)
348		device_printf(dev, "DMAR reserve [%#jx-%#jx] (error %d)\n",
349		    base, limit + 1, error);
350	if (error != 0)
351		return (error);
352
353	/* Disable downstream prefetchable memory */
354	val = pci_read_config(root, PCIR_PMBASEL_1, 2);
355	if (val != 0 || pci_read_config(root, PCIR_PMLIMITL_1, 2) != 0) {
356		if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) {
357			base = PCI_PPBMEMBASE(
358			    pci_read_config(root, PCIR_PMBASEH_1, 4),
359			    val);
360			limit = PCI_PPBMEMLIMIT(
361			    pci_read_config(root, PCIR_PMLIMITH_1, 4),
362			    pci_read_config(root, PCIR_PMLIMITL_1, 2));
363		} else {
364			base = PCI_PPBMEMBASE(0, val);
365			limit = PCI_PPBMEMLIMIT(0,
366			    pci_read_config(root, PCIR_PMLIMITL_1, 2));
367		}
368		error = iommu_gas_reserve_region_extend(iodom, base,
369		    limit + 1);
370		if (bootverbose || error != 0)
371			device_printf(dev, "DMAR reserve [%#jx-%#jx] "
372			    "(error %d)\n", base, limit + 1, error);
373		if (error != 0)
374			return (error);
375	}
376
377	return (error);
378}
379
380static struct dmar_domain *
381dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
382{
383	struct iommu_domain *iodom;
384	struct iommu_unit *unit;
385	struct dmar_domain *domain;
386	int error, id, mgaw;
387
388	id = alloc_unr(dmar->domids);
389	if (id == -1)
390		return (NULL);
391	domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
392	iodom = DOM2IODOM(domain);
393	unit = DMAR2IOMMU(dmar);
394	domain->domain = id;
395	LIST_INIT(&domain->contexts);
396	iommu_domain_init(unit, iodom, &dmar_domain_map_ops);
397
398	domain->dmar = dmar;
399
400	/*
401	 * For now, use the maximal usable physical address of the
402	 * installed memory to calculate the mgaw on id_mapped domain.
403	 * It is useful for the identity mapping, and less so for the
404	 * virtualized bus address space.
405	 */
406	domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
407	mgaw = dmar_maxaddr2mgaw(dmar, domain->iodom.end, !id_mapped);
408	error = domain_set_agaw(domain, mgaw);
409	if (error != 0)
410		goto fail;
411	if (!id_mapped)
412		/* Use all supported address space for remapping. */
413		domain->iodom.end = 1ULL << (domain->agaw - 1);
414
415	iommu_gas_init_domain(DOM2IODOM(domain));
416
417	if (id_mapped) {
418		if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
419			domain->pgtbl_obj = domain_get_idmap_pgtbl(domain,
420			    domain->iodom.end);
421		}
422		domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
423	} else {
424		error = domain_alloc_pgtbl(domain);
425		if (error != 0)
426			goto fail;
427		/* Disable local apic region access */
428		error = iommu_gas_reserve_region(iodom, 0xfee00000,
429		    0xfeefffff + 1, &iodom->msi_entry);
430		if (error != 0)
431			goto fail;
432	}
433	return (domain);
434
435fail:
436	dmar_domain_destroy(domain);
437	return (NULL);
438}
439
440static struct dmar_ctx *
441dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
442{
443	struct dmar_ctx *ctx;
444
445	ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
446	ctx->context.domain = DOM2IODOM(domain);
447	ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
448	    M_DMAR_CTX, M_WAITOK | M_ZERO);
449	ctx->context.rid = rid;
450	ctx->refs = 1;
451	return (ctx);
452}
453
454static void
455dmar_ctx_link(struct dmar_ctx *ctx)
456{
457	struct dmar_domain *domain;
458
459	domain = CTX2DOM(ctx);
460	IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
461	KASSERT(domain->refs >= domain->ctx_cnt,
462	    ("dom %p ref underflow %d %d", domain, domain->refs,
463	    domain->ctx_cnt));
464	domain->refs++;
465	domain->ctx_cnt++;
466	LIST_INSERT_HEAD(&domain->contexts, ctx, link);
467}
468
469static void
470dmar_ctx_unlink(struct dmar_ctx *ctx)
471{
472	struct dmar_domain *domain;
473
474	domain = CTX2DOM(ctx);
475	IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
476	KASSERT(domain->refs > 0,
477	    ("domain %p ctx dtr refs %d", domain, domain->refs));
478	KASSERT(domain->ctx_cnt >= domain->refs,
479	    ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
480	    domain->refs, domain->ctx_cnt));
481	domain->refs--;
482	domain->ctx_cnt--;
483	LIST_REMOVE(ctx, link);
484}
485
486static void
487dmar_domain_destroy(struct dmar_domain *domain)
488{
489	struct iommu_domain *iodom;
490	struct dmar_unit *dmar;
491
492	iodom = DOM2IODOM(domain);
493
494	KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
495	    ("unfinished unloads %p", domain));
496	KASSERT(LIST_EMPTY(&domain->contexts),
497	    ("destroying dom %p with contexts", domain));
498	KASSERT(domain->ctx_cnt == 0,
499	    ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
500	KASSERT(domain->refs == 0,
501	    ("destroying dom %p with refs %d", domain, domain->refs));
502	if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
503		DMAR_DOMAIN_LOCK(domain);
504		iommu_gas_fini_domain(iodom);
505		DMAR_DOMAIN_UNLOCK(domain);
506	}
507	if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
508		if (domain->pgtbl_obj != NULL)
509			DMAR_DOMAIN_PGLOCK(domain);
510		domain_free_pgtbl(domain);
511	}
512	iommu_domain_fini(iodom);
513	dmar = DOM2DMAR(domain);
514	free_unr(dmar->domids, domain->domain);
515	free(domain, M_DMAR_DOMAIN);
516}
517
518static struct dmar_ctx *
519dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid,
520    int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
521    bool id_mapped, bool rmrr_init)
522{
523	struct dmar_domain *domain, *domain1;
524	struct dmar_ctx *ctx, *ctx1;
525	struct iommu_unit *unit;
526	dmar_ctx_entry_t *ctxp;
527	struct sf_buf *sf;
528	int bus, slot, func, error;
529	bool enable;
530
531	if (dev != NULL) {
532		bus = pci_get_bus(dev);
533		slot = pci_get_slot(dev);
534		func = pci_get_function(dev);
535	} else {
536		bus = PCI_RID2BUS(rid);
537		slot = PCI_RID2SLOT(rid);
538		func = PCI_RID2FUNC(rid);
539	}
540	enable = false;
541	TD_PREP_PINNED_ASSERT;
542	unit = DMAR2IOMMU(dmar);
543	DMAR_LOCK(dmar);
544	KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0),
545	    ("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus,
546	    slot, func));
547	ctx = dmar_find_ctx_locked(dmar, rid);
548	error = 0;
549	if (ctx == NULL) {
550		/*
551		 * Perform the allocations which require sleep or have
552		 * higher chance to succeed if the sleep is allowed.
553		 */
554		DMAR_UNLOCK(dmar);
555		dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
556		domain1 = dmar_domain_alloc(dmar, id_mapped);
557		if (domain1 == NULL) {
558			TD_PINNED_ASSERT;
559			return (NULL);
560		}
561		if (!id_mapped) {
562			error = domain_init_rmrr(domain1, dev, bus,
563			    slot, func, dev_domain, dev_busno, dev_path,
564			    dev_path_len);
565			if (error == 0)
566				error = dmar_reserve_pci_regions(domain1, dev);
567			if (error != 0) {
568				dmar_domain_destroy(domain1);
569				TD_PINNED_ASSERT;
570				return (NULL);
571			}
572		}
573		ctx1 = dmar_ctx_alloc(domain1, rid);
574		ctxp = dmar_map_ctx_entry(ctx1, &sf);
575		DMAR_LOCK(dmar);
576
577		/*
578		 * Recheck the contexts, other thread might have
579		 * already allocated needed one.
580		 */
581		ctx = dmar_find_ctx_locked(dmar, rid);
582		if (ctx == NULL) {
583			domain = domain1;
584			ctx = ctx1;
585			dmar_ctx_link(ctx);
586			ctx->context.tag->owner = dev;
587			device_tag_init(ctx, dev);
588
589			/*
590			 * This is the first activated context for the
591			 * DMAR unit.  Enable the translation after
592			 * everything is set up.
593			 */
594			if (LIST_EMPTY(&dmar->domains))
595				enable = true;
596			LIST_INSERT_HEAD(&dmar->domains, domain, link);
597			ctx_id_entry_init(ctx, ctxp, false, bus);
598			if (dev != NULL) {
599				device_printf(dev,
600			    "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
601				    "agaw %d %s-mapped\n",
602				    dmar->iommu.unit, dmar->segment, bus, slot,
603				    func, rid, domain->domain, domain->mgaw,
604				    domain->agaw, id_mapped ? "id" : "re");
605			}
606			dmar_unmap_pgtbl(sf);
607		} else {
608			dmar_unmap_pgtbl(sf);
609			dmar_domain_destroy(domain1);
610			/* Nothing needs to be done to destroy ctx1. */
611			free(ctx1, M_DMAR_CTX);
612			domain = CTX2DOM(ctx);
613			ctx->refs++; /* tag referenced us */
614		}
615	} else {
616		domain = CTX2DOM(ctx);
617		if (ctx->context.tag->owner == NULL)
618			ctx->context.tag->owner = dev;
619		ctx->refs++; /* tag referenced us */
620	}
621
622	error = dmar_flush_for_ctx_entry(dmar, enable);
623	if (error != 0) {
624		dmar_free_ctx_locked(dmar, ctx);
625		TD_PINNED_ASSERT;
626		return (NULL);
627	}
628
629	/*
630	 * The dmar lock was potentially dropped between check for the
631	 * empty context list and now.  Recheck the state of GCMD_TE
632	 * to avoid unneeded command.
633	 */
634	if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
635		error = dmar_enable_translation(dmar);
636		if (error == 0) {
637			if (bootverbose) {
638				printf("dmar%d: enabled translation\n",
639				    dmar->iommu.unit);
640			}
641		} else {
642			printf("dmar%d: enabling translation failed, "
643			    "error %d\n", dmar->iommu.unit, error);
644			dmar_free_ctx_locked(dmar, ctx);
645			TD_PINNED_ASSERT;
646			return (NULL);
647		}
648	}
649	DMAR_UNLOCK(dmar);
650	TD_PINNED_ASSERT;
651	return (ctx);
652}
653
654struct dmar_ctx *
655dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
656    bool id_mapped, bool rmrr_init)
657{
658	int dev_domain, dev_path_len, dev_busno;
659
660	dev_domain = pci_get_domain(dev);
661	dev_path_len = dmar_dev_depth(dev);
662	ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
663	dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
664	return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno,
665	    dev_path, dev_path_len, id_mapped, rmrr_init));
666}
667
668struct dmar_ctx *
669dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
670    int dev_domain, int dev_busno,
671    const void *dev_path, int dev_path_len,
672    bool id_mapped, bool rmrr_init)
673{
674
675	return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno,
676	    dev_path, dev_path_len, id_mapped, rmrr_init));
677}
678
679int
680dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
681{
682	struct dmar_unit *dmar;
683	struct dmar_domain *old_domain;
684	dmar_ctx_entry_t *ctxp;
685	struct sf_buf *sf;
686	int error;
687
688	dmar = domain->dmar;
689	old_domain = CTX2DOM(ctx);
690	if (domain == old_domain)
691		return (0);
692	KASSERT(old_domain->iodom.iommu == domain->iodom.iommu,
693	    ("domain %p %u moving between dmars %u %u", domain,
694	    domain->domain, old_domain->iodom.iommu->unit,
695	    domain->iodom.iommu->unit));
696	TD_PREP_PINNED_ASSERT;
697
698	ctxp = dmar_map_ctx_entry(ctx, &sf);
699	DMAR_LOCK(dmar);
700	dmar_ctx_unlink(ctx);
701	ctx->context.domain = &domain->iodom;
702	dmar_ctx_link(ctx);
703	ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100);
704	dmar_unmap_pgtbl(sf);
705	error = dmar_flush_for_ctx_entry(dmar, true);
706	/* If flush failed, rolling back would not work as well. */
707	printf("dmar%d rid %x domain %d->%d %s-mapped\n",
708	    dmar->iommu.unit, ctx->context.rid, old_domain->domain,
709	    domain->domain, (domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 ?
710	    "id" : "re");
711	dmar_unref_domain_locked(dmar, old_domain);
712	TD_PINNED_ASSERT;
713	return (error);
714}
715
716static void
717dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
718{
719
720	DMAR_ASSERT_LOCKED(dmar);
721	KASSERT(domain->refs >= 1,
722	    ("dmar %d domain %p refs %u", dmar->iommu.unit, domain,
723	    domain->refs));
724	KASSERT(domain->refs > domain->ctx_cnt,
725	    ("dmar %d domain %p refs %d ctx_cnt %d", dmar->iommu.unit, domain,
726	    domain->refs, domain->ctx_cnt));
727
728	if (domain->refs > 1) {
729		domain->refs--;
730		DMAR_UNLOCK(dmar);
731		return;
732	}
733
734	KASSERT((domain->iodom.flags & IOMMU_DOMAIN_RMRR) == 0,
735	    ("lost ref on RMRR domain %p", domain));
736
737	LIST_REMOVE(domain, link);
738	DMAR_UNLOCK(dmar);
739
740	taskqueue_drain(dmar->iommu.delayed_taskqueue,
741	    &domain->iodom.unload_task);
742	dmar_domain_destroy(domain);
743}
744
745void
746dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
747{
748	struct sf_buf *sf;
749	dmar_ctx_entry_t *ctxp;
750	struct dmar_domain *domain;
751
752	DMAR_ASSERT_LOCKED(dmar);
753	KASSERT(ctx->refs >= 1,
754	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
755
756	/*
757	 * If our reference is not last, only the dereference should
758	 * be performed.
759	 */
760	if (ctx->refs > 1) {
761		ctx->refs--;
762		DMAR_UNLOCK(dmar);
763		return;
764	}
765
766	KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
767	    ("lost ref on disabled ctx %p", ctx));
768
769	/*
770	 * Otherwise, the context entry must be cleared before the
771	 * page table is destroyed.  The mapping of the context
772	 * entries page could require sleep, unlock the dmar.
773	 */
774	DMAR_UNLOCK(dmar);
775	TD_PREP_PINNED_ASSERT;
776	ctxp = dmar_map_ctx_entry(ctx, &sf);
777	DMAR_LOCK(dmar);
778	KASSERT(ctx->refs >= 1,
779	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
780
781	/*
782	 * Other thread might have referenced the context, in which
783	 * case again only the dereference should be performed.
784	 */
785	if (ctx->refs > 1) {
786		ctx->refs--;
787		DMAR_UNLOCK(dmar);
788		dmar_unmap_pgtbl(sf);
789		TD_PINNED_ASSERT;
790		return;
791	}
792
793	KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
794	    ("lost ref on disabled ctx %p", ctx));
795
796	/*
797	 * Clear the context pointer and flush the caches.
798	 * XXXKIB: cannot do this if any RMRR entries are still present.
799	 */
800	dmar_pte_clear(&ctxp->ctx1);
801	ctxp->ctx2 = 0;
802	dmar_flush_ctx_to_ram(dmar, ctxp);
803	dmar_inv_ctx_glob(dmar);
804	if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
805		if (dmar->qi_enabled)
806			dmar_qi_invalidate_iotlb_glob_locked(dmar);
807		else
808			dmar_inv_iotlb_glob(dmar);
809	}
810	dmar_unmap_pgtbl(sf);
811	domain = CTX2DOM(ctx);
812	dmar_ctx_unlink(ctx);
813	free(ctx->context.tag, M_DMAR_CTX);
814	free(ctx, M_DMAR_CTX);
815	dmar_unref_domain_locked(dmar, domain);
816	TD_PINNED_ASSERT;
817}
818
819void
820dmar_free_ctx(struct dmar_ctx *ctx)
821{
822	struct dmar_unit *dmar;
823
824	dmar = CTX2DMAR(ctx);
825	DMAR_LOCK(dmar);
826	dmar_free_ctx_locked(dmar, ctx);
827}
828
829/*
830 * Returns with the domain locked.
831 */
832struct dmar_ctx *
833dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
834{
835	struct dmar_domain *domain;
836	struct dmar_ctx *ctx;
837
838	DMAR_ASSERT_LOCKED(dmar);
839
840	LIST_FOREACH(domain, &dmar->domains, link) {
841		LIST_FOREACH(ctx, &domain->contexts, link) {
842			if (ctx->context.rid == rid)
843				return (ctx);
844		}
845	}
846	return (NULL);
847}
848
849void
850dmar_domain_free_entry(struct iommu_map_entry *entry, bool free)
851{
852	struct iommu_domain *domain;
853
854	domain = entry->domain;
855	IOMMU_DOMAIN_LOCK(domain);
856	if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
857		iommu_gas_free_region(domain, entry);
858	else
859		iommu_gas_free_space(domain, entry);
860	IOMMU_DOMAIN_UNLOCK(domain);
861	if (free)
862		iommu_gas_free_entry(domain, entry);
863	else
864		entry->flags = 0;
865}
866
867void
868dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free)
869{
870	struct dmar_domain *domain;
871	struct dmar_unit *unit;
872
873	domain = IODOM2DOM(entry->domain);
874	unit = DOM2DMAR(domain);
875	if (unit->qi_enabled) {
876		DMAR_LOCK(unit);
877		dmar_qi_invalidate_locked(IODOM2DOM(entry->domain),
878		    entry->start, entry->end - entry->start, &entry->gseq,
879		    true);
880		if (!free)
881			entry->flags |= IOMMU_MAP_ENTRY_QI_NF;
882		TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link);
883		DMAR_UNLOCK(unit);
884	} else {
885		domain_flush_iotlb_sync(IODOM2DOM(entry->domain),
886		    entry->start, entry->end - entry->start);
887		dmar_domain_free_entry(entry, free);
888	}
889}
890
891static bool
892dmar_domain_unload_emit_wait(struct dmar_domain *domain,
893    struct iommu_map_entry *entry)
894{
895
896	if (TAILQ_NEXT(entry, dmamap_link) == NULL)
897		return (true);
898	return (domain->batch_no++ % dmar_batch_coalesce == 0);
899}
900
901void
902dmar_domain_unload(struct dmar_domain *domain,
903    struct iommu_map_entries_tailq *entries, bool cansleep)
904{
905	struct dmar_unit *unit;
906	struct iommu_domain *iodom;
907	struct iommu_map_entry *entry, *entry1;
908	int error;
909
910	iodom = DOM2IODOM(domain);
911	unit = DOM2DMAR(domain);
912
913	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
914		KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
915		    ("not mapped entry %p %p", domain, entry));
916		error = iodom->ops->unmap(iodom, entry->start, entry->end -
917		    entry->start, cansleep ? IOMMU_PGF_WAITOK : 0);
918		KASSERT(error == 0, ("unmap %p error %d", domain, error));
919		if (!unit->qi_enabled) {
920			domain_flush_iotlb_sync(domain, entry->start,
921			    entry->end - entry->start);
922			TAILQ_REMOVE(entries, entry, dmamap_link);
923			dmar_domain_free_entry(entry, true);
924		}
925	}
926	if (TAILQ_EMPTY(entries))
927		return;
928
929	KASSERT(unit->qi_enabled, ("loaded entry left"));
930	DMAR_LOCK(unit);
931	TAILQ_FOREACH(entry, entries, dmamap_link) {
932		dmar_qi_invalidate_locked(domain, entry->start, entry->end -
933		    entry->start, &entry->gseq,
934		    dmar_domain_unload_emit_wait(domain, entry));
935	}
936	TAILQ_CONCAT(&unit->tlb_flush_entries, entries, dmamap_link);
937	DMAR_UNLOCK(unit);
938}
939
940struct iommu_ctx *
941iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
942    bool id_mapped, bool rmrr_init)
943{
944	struct dmar_unit *dmar;
945	struct dmar_ctx *ret;
946
947	dmar = IOMMU2DMAR(iommu);
948
949	ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init);
950
951	return (CTX2IOCTX(ret));
952}
953
954void
955iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context)
956{
957	struct dmar_unit *dmar;
958	struct dmar_ctx *ctx;
959
960	dmar = IOMMU2DMAR(iommu);
961	ctx = IOCTX2CTX(context);
962
963	dmar_free_ctx_locked(dmar, ctx);
964}
965
966void
967iommu_free_ctx(struct iommu_ctx *context)
968{
969	struct dmar_ctx *ctx;
970
971	ctx = IOCTX2CTX(context);
972
973	dmar_free_ctx(ctx);
974}
975
976void
977iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
978{
979
980	dmar_domain_unload_entry(entry, free);
981}
982
983void
984iommu_domain_unload(struct iommu_domain *iodom,
985    struct iommu_map_entries_tailq *entries, bool cansleep)
986{
987	struct dmar_domain *domain;
988
989	domain = IODOM2DOM(iodom);
990
991	dmar_domain_unload(domain, entries, cansleep);
992}
993