1/*-
2 * Copyright (c) 2013 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: releng/11.0/sys/x86/iommu/intel_ctx.c 298144 2016-04-17 10:56:56Z kib $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/malloc.h>
36#include <sys/bus.h>
37#include <sys/interrupt.h>
38#include <sys/kernel.h>
39#include <sys/ktr.h>
40#include <sys/limits.h>
41#include <sys/lock.h>
42#include <sys/memdesc.h>
43#include <sys/mutex.h>
44#include <sys/proc.h>
45#include <sys/rwlock.h>
46#include <sys/rman.h>
47#include <sys/sysctl.h>
48#include <sys/taskqueue.h>
49#include <sys/tree.h>
50#include <sys/uio.h>
51#include <sys/vmem.h>
52#include <vm/vm.h>
53#include <vm/vm_extern.h>
54#include <vm/vm_kern.h>
55#include <vm/vm_object.h>
56#include <vm/vm_page.h>
57#include <vm/vm_pager.h>
58#include <vm/vm_map.h>
59#include <machine/atomic.h>
60#include <machine/bus.h>
61#include <machine/md_var.h>
62#include <machine/specialreg.h>
63#include <x86/include/busdma_impl.h>
64#include <x86/iommu/intel_reg.h>
65#include <x86/iommu/busdma_dmar.h>
66#include <x86/iommu/intel_dmar.h>
67#include <dev/pci/pcireg.h>
68#include <dev/pci/pcivar.h>
69
70static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
71static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
72
73static void dmar_domain_unload_task(void *arg, int pending);
74static void dmar_unref_domain_locked(struct dmar_unit *dmar,
75    struct dmar_domain *domain);
76static void dmar_domain_destroy(struct dmar_domain *domain);
77
78static void
79dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
80{
81	struct sf_buf *sf;
82	dmar_root_entry_t *re;
83	vm_page_t ctxm;
84
85	/*
86	 * Allocated context page must be linked.
87	 */
88	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_NOALLOC);
89	if (ctxm != NULL)
90		return;
91
92	/*
93	 * Page not present, allocate and link.  Note that other
94	 * thread might execute this sequence in parallel.  This
95	 * should be safe, because the context entries written by both
96	 * threads are equal.
97	 */
98	TD_PREP_PINNED_ASSERT;
99	ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_ZERO |
100	    DMAR_PGF_WAITOK);
101	re = dmar_map_pgtbl(dmar->ctx_obj, 0, DMAR_PGF_NOALLOC, &sf);
102	re += bus;
103	dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
104	    VM_PAGE_TO_PHYS(ctxm)));
105	dmar_flush_root_to_ram(dmar, re);
106	dmar_unmap_pgtbl(sf);
107	TD_PINNED_ASSERT;
108}
109
110static dmar_ctx_entry_t *
111dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
112{
113	dmar_ctx_entry_t *ctxp;
114
115	ctxp = dmar_map_pgtbl(ctx->domain->dmar->ctx_obj, 1 +
116	    PCI_RID2BUS(ctx->rid), DMAR_PGF_NOALLOC | DMAR_PGF_WAITOK, sfp);
117	ctxp += ctx->rid & 0xff;
118	return (ctxp);
119}
120
121static void
122ctx_tag_init(struct dmar_ctx *ctx, device_t dev)
123{
124	bus_addr_t maxaddr;
125
126	maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR);
127	ctx->ctx_tag.common.ref_count = 1; /* Prevent free */
128	ctx->ctx_tag.common.impl = &bus_dma_dmar_impl;
129	ctx->ctx_tag.common.boundary = PCI_DMA_BOUNDARY;
130	ctx->ctx_tag.common.lowaddr = maxaddr;
131	ctx->ctx_tag.common.highaddr = maxaddr;
132	ctx->ctx_tag.common.maxsize = maxaddr;
133	ctx->ctx_tag.common.nsegments = BUS_SPACE_UNRESTRICTED;
134	ctx->ctx_tag.common.maxsegsz = maxaddr;
135	ctx->ctx_tag.ctx = ctx;
136	ctx->ctx_tag.owner = dev;
137}
138
139static void
140ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move)
141{
142	struct dmar_unit *unit;
143	struct dmar_domain *domain;
144	vm_page_t ctx_root;
145
146	domain = ctx->domain;
147	unit = domain->dmar;
148	KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
149	    ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
150	    unit->unit, pci_get_bus(ctx->ctx_tag.owner),
151	    pci_get_slot(ctx->ctx_tag.owner),
152	    pci_get_function(ctx->ctx_tag.owner),
153	    ctxp->ctx1, ctxp->ctx2));
154	/*
155	 * For update due to move, the store is not atomic.  It is
156	 * possible that DMAR read upper doubleword, while low
157	 * doubleword is not yet updated.  The domain id is stored in
158	 * the upper doubleword, while the table pointer in the lower.
159	 *
160	 * There is no good solution, for the same reason it is wrong
161	 * to clear P bit in the ctx entry for update.
162	 */
163	dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
164	    domain->awlvl);
165	if ((domain->flags & DMAR_DOMAIN_IDMAP) != 0 &&
166	    (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
167		KASSERT(domain->pgtbl_obj == NULL,
168		    ("ctx %p non-null pgtbl_obj", ctx));
169		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
170	} else {
171		ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0, DMAR_PGF_NOALLOC);
172		dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
173		    (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
174		    DMAR_CTX1_P);
175	}
176	dmar_flush_ctx_to_ram(unit, ctxp);
177}
178
179static int
180dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
181{
182	int error;
183
184	/*
185	 * If dmar declares Caching Mode as Set, follow 11.5 "Caching
186	 * Mode Consideration" and do the (global) invalidation of the
187	 * negative TLB entries.
188	 */
189	if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
190		return (0);
191	if (dmar->qi_enabled) {
192		dmar_qi_invalidate_ctx_glob_locked(dmar);
193		if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
194			dmar_qi_invalidate_iotlb_glob_locked(dmar);
195		return (0);
196	}
197	error = dmar_inv_ctx_glob(dmar);
198	if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
199		error = dmar_inv_iotlb_glob(dmar);
200	return (error);
201}
202
203static int
204domain_init_rmrr(struct dmar_domain *domain, device_t dev)
205{
206	struct dmar_map_entries_tailq rmrr_entries;
207	struct dmar_map_entry *entry, *entry1;
208	vm_page_t *ma;
209	dmar_gaddr_t start, end;
210	vm_pindex_t size, i;
211	int error, error1;
212
213	error = 0;
214	TAILQ_INIT(&rmrr_entries);
215	dmar_dev_parse_rmrr(domain, dev, &rmrr_entries);
216	TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) {
217		/*
218		 * VT-d specification requires that the start of an
219		 * RMRR entry is 4k-aligned.  Buggy BIOSes put
220		 * anything into the start and end fields.  Truncate
221		 * and round as neccesary.
222		 *
223		 * We also allow the overlapping RMRR entries, see
224		 * dmar_gas_alloc_region().
225		 */
226		start = entry->start;
227		end = entry->end;
228		entry->start = trunc_page(start);
229		entry->end = round_page(end);
230		if (entry->start == entry->end) {
231			/* Workaround for some AMI (?) BIOSes */
232			if (bootverbose) {
233				device_printf(dev, "BIOS bug: dmar%d RMRR "
234				    "region (%jx, %jx) corrected\n",
235				    domain->dmar->unit, start, end);
236			}
237			entry->end += DMAR_PAGE_SIZE * 0x20;
238		}
239		size = OFF_TO_IDX(entry->end - entry->start);
240		ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
241		for (i = 0; i < size; i++) {
242			ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
243			    VM_MEMATTR_DEFAULT);
244		}
245		error1 = dmar_gas_map_region(domain, entry,
246		    DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
247		    DMAR_GM_CANWAIT, ma);
248		/*
249		 * Non-failed RMRR entries are owned by context rb
250		 * tree.  Get rid of the failed entry, but do not stop
251		 * the loop.  Rest of the parsed RMRR entries are
252		 * loaded and removed on the context destruction.
253		 */
254		if (error1 == 0 && entry->end != entry->start) {
255			DMAR_LOCK(domain->dmar);
256			domain->refs++; /* XXXKIB prevent free */
257			domain->flags |= DMAR_DOMAIN_RMRR;
258			DMAR_UNLOCK(domain->dmar);
259		} else {
260			if (error1 != 0) {
261				device_printf(dev,
262			    "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
263				    domain->dmar->unit, start, end, error1);
264				error = error1;
265			}
266			TAILQ_REMOVE(&rmrr_entries, entry, unroll_link);
267			dmar_gas_free_entry(domain, entry);
268		}
269		for (i = 0; i < size; i++)
270			vm_page_putfake(ma[i]);
271		free(ma, M_TEMP);
272	}
273	return (error);
274}
275
276static struct dmar_domain *
277dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
278{
279	struct dmar_domain *domain;
280	int error, id, mgaw;
281
282	id = alloc_unr(dmar->domids);
283	if (id == -1)
284		return (NULL);
285	domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
286	domain->domain = id;
287	LIST_INIT(&domain->contexts);
288	RB_INIT(&domain->rb_root);
289	TAILQ_INIT(&domain->unload_entries);
290	TASK_INIT(&domain->unload_task, 0, dmar_domain_unload_task, domain);
291	mtx_init(&domain->lock, "dmardom", NULL, MTX_DEF);
292	domain->dmar = dmar;
293
294	/*
295	 * For now, use the maximal usable physical address of the
296	 * installed memory to calculate the mgaw on id_mapped domain.
297	 * It is useful for the identity mapping, and less so for the
298	 * virtualized bus address space.
299	 */
300	domain->end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
301	mgaw = dmar_maxaddr2mgaw(dmar, domain->end, !id_mapped);
302	error = domain_set_agaw(domain, mgaw);
303	if (error != 0)
304		goto fail;
305	if (!id_mapped)
306		/* Use all supported address space for remapping. */
307		domain->end = 1ULL << (domain->agaw - 1);
308
309	dmar_gas_init_domain(domain);
310
311	if (id_mapped) {
312		if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
313			domain->pgtbl_obj = domain_get_idmap_pgtbl(domain,
314			    domain->end);
315		}
316		domain->flags |= DMAR_DOMAIN_IDMAP;
317	} else {
318		error = domain_alloc_pgtbl(domain);
319		if (error != 0)
320			goto fail;
321		/* Disable local apic region access */
322		error = dmar_gas_reserve_region(domain, 0xfee00000,
323		    0xfeefffff + 1);
324		if (error != 0)
325			goto fail;
326	}
327	return (domain);
328
329fail:
330	dmar_domain_destroy(domain);
331	return (NULL);
332}
333
334static struct dmar_ctx *
335dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
336{
337	struct dmar_ctx *ctx;
338
339	ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
340	ctx->domain = domain;
341	ctx->rid = rid;
342	ctx->refs = 1;
343	return (ctx);
344}
345
346static void
347dmar_ctx_link(struct dmar_ctx *ctx)
348{
349	struct dmar_domain *domain;
350
351	domain = ctx->domain;
352	DMAR_ASSERT_LOCKED(domain->dmar);
353	KASSERT(domain->refs >= domain->ctx_cnt,
354	    ("dom %p ref underflow %d %d", domain, domain->refs,
355	    domain->ctx_cnt));
356	domain->refs++;
357	domain->ctx_cnt++;
358	LIST_INSERT_HEAD(&domain->contexts, ctx, link);
359}
360
361static void
362dmar_ctx_unlink(struct dmar_ctx *ctx)
363{
364	struct dmar_domain *domain;
365
366	domain = ctx->domain;
367	DMAR_ASSERT_LOCKED(domain->dmar);
368	KASSERT(domain->refs > 0,
369	    ("domain %p ctx dtr refs %d", domain, domain->refs));
370	KASSERT(domain->ctx_cnt >= domain->refs,
371	    ("domain %p ctx dtr refs %d ctx_cnt %d", domain,
372	    domain->refs, domain->ctx_cnt));
373	domain->refs--;
374	domain->ctx_cnt--;
375	LIST_REMOVE(ctx, link);
376}
377
378static void
379dmar_domain_destroy(struct dmar_domain *domain)
380{
381
382	KASSERT(TAILQ_EMPTY(&domain->unload_entries),
383	    ("unfinished unloads %p", domain));
384	KASSERT(LIST_EMPTY(&domain->contexts),
385	    ("destroying dom %p with contexts", domain));
386	KASSERT(domain->ctx_cnt == 0,
387	    ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
388	KASSERT(domain->refs == 0,
389	    ("destroying dom %p with refs %d", domain, domain->refs));
390	if ((domain->flags & DMAR_DOMAIN_GAS_INITED) != 0) {
391		DMAR_DOMAIN_LOCK(domain);
392		dmar_gas_fini_domain(domain);
393		DMAR_DOMAIN_UNLOCK(domain);
394	}
395	if ((domain->flags & DMAR_DOMAIN_PGTBL_INITED) != 0) {
396		if (domain->pgtbl_obj != NULL)
397			DMAR_DOMAIN_PGLOCK(domain);
398		domain_free_pgtbl(domain);
399	}
400	mtx_destroy(&domain->lock);
401	free_unr(domain->dmar->domids, domain->domain);
402	free(domain, M_DMAR_DOMAIN);
403}
404
405struct dmar_ctx *
406dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
407    bool id_mapped, bool rmrr_init)
408{
409	struct dmar_domain *domain, *domain1;
410	struct dmar_ctx *ctx, *ctx1;
411	dmar_ctx_entry_t *ctxp;
412	struct sf_buf *sf;
413	int bus, slot, func, error;
414	bool enable;
415
416	bus = pci_get_bus(dev);
417	slot = pci_get_slot(dev);
418	func = pci_get_function(dev);
419	enable = false;
420	TD_PREP_PINNED_ASSERT;
421	DMAR_LOCK(dmar);
422	ctx = dmar_find_ctx_locked(dmar, rid);
423	error = 0;
424	if (ctx == NULL) {
425		/*
426		 * Perform the allocations which require sleep or have
427		 * higher chance to succeed if the sleep is allowed.
428		 */
429		DMAR_UNLOCK(dmar);
430		dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
431		domain1 = dmar_domain_alloc(dmar, id_mapped);
432		if (domain1 == NULL) {
433			TD_PINNED_ASSERT;
434			return (NULL);
435		}
436		error = domain_init_rmrr(domain1, dev);
437		if (error != 0) {
438			dmar_domain_destroy(domain1);
439			TD_PINNED_ASSERT;
440			return (NULL);
441		}
442		ctx1 = dmar_ctx_alloc(domain1, rid);
443		ctxp = dmar_map_ctx_entry(ctx1, &sf);
444		DMAR_LOCK(dmar);
445
446		/*
447		 * Recheck the contexts, other thread might have
448		 * already allocated needed one.
449		 */
450		ctx = dmar_find_ctx_locked(dmar, rid);
451		if (ctx == NULL) {
452			domain = domain1;
453			ctx = ctx1;
454			dmar_ctx_link(ctx);
455			ctx->ctx_tag.owner = dev;
456			ctx_tag_init(ctx, dev);
457
458			/*
459			 * This is the first activated context for the
460			 * DMAR unit.  Enable the translation after
461			 * everything is set up.
462			 */
463			if (LIST_EMPTY(&dmar->domains))
464				enable = true;
465			LIST_INSERT_HEAD(&dmar->domains, domain, link);
466			ctx_id_entry_init(ctx, ctxp, false);
467			device_printf(dev,
468			    "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
469			    "agaw %d %s-mapped\n",
470			    dmar->unit, dmar->segment, bus, slot,
471			    func, rid, domain->domain, domain->mgaw,
472			    domain->agaw, id_mapped ? "id" : "re");
473		} else {
474			/* Nothing needs to be done to destroy ctx1. */
475			dmar_domain_destroy(domain1);
476			domain = ctx->domain;
477			ctx->refs++; /* tag referenced us */
478		}
479		dmar_unmap_pgtbl(sf);
480	} else {
481		domain = ctx->domain;
482		ctx->refs++; /* tag referenced us */
483	}
484
485	error = dmar_flush_for_ctx_entry(dmar, enable);
486	if (error != 0) {
487		dmar_free_ctx_locked(dmar, ctx);
488		TD_PINNED_ASSERT;
489		return (NULL);
490	}
491
492	/*
493	 * The dmar lock was potentially dropped between check for the
494	 * empty context list and now.  Recheck the state of GCMD_TE
495	 * to avoid unneeded command.
496	 */
497	if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
498		error = dmar_enable_translation(dmar);
499		if (error != 0) {
500			dmar_free_ctx_locked(dmar, ctx);
501			TD_PINNED_ASSERT;
502			return (NULL);
503		}
504	}
505	DMAR_UNLOCK(dmar);
506	TD_PINNED_ASSERT;
507	return (ctx);
508}
509
510int
511dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
512{
513	struct dmar_unit *dmar;
514	struct dmar_domain *old_domain;
515	dmar_ctx_entry_t *ctxp;
516	struct sf_buf *sf;
517	int error;
518
519	dmar = domain->dmar;
520	old_domain = ctx->domain;
521	if (domain == old_domain)
522		return (0);
523	KASSERT(old_domain->dmar == dmar,
524	    ("domain %p %u moving between dmars %u %u", domain,
525	    domain->domain, old_domain->dmar->unit, domain->dmar->unit));
526	TD_PREP_PINNED_ASSERT;
527
528	ctxp = dmar_map_ctx_entry(ctx, &sf);
529	DMAR_LOCK(dmar);
530	dmar_ctx_unlink(ctx);
531	ctx->domain = domain;
532	dmar_ctx_link(ctx);
533	ctx_id_entry_init(ctx, ctxp, true);
534	dmar_unmap_pgtbl(sf);
535	error = dmar_flush_for_ctx_entry(dmar, true);
536	/* If flush failed, rolling back would not work as well. */
537	printf("dmar%d rid %x domain %d->%d %s-mapped\n",
538	    dmar->unit, ctx->rid, old_domain->domain, domain->domain,
539	    (domain->flags & DMAR_DOMAIN_IDMAP) != 0 ? "id" : "re");
540	dmar_unref_domain_locked(dmar, old_domain);
541	TD_PINNED_ASSERT;
542	return (error);
543}
544
545static void
546dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
547{
548
549	DMAR_ASSERT_LOCKED(dmar);
550	KASSERT(domain->refs >= 1,
551	    ("dmar %d domain %p refs %u", dmar->unit, domain, domain->refs));
552	KASSERT(domain->refs > domain->ctx_cnt,
553	    ("dmar %d domain %p refs %d ctx_cnt %d", dmar->unit, domain,
554	    domain->refs, domain->ctx_cnt));
555
556	if (domain->refs > 1) {
557		domain->refs--;
558		DMAR_UNLOCK(dmar);
559		return;
560	}
561
562	KASSERT((domain->flags & DMAR_DOMAIN_RMRR) == 0,
563	    ("lost ref on RMRR domain %p", domain));
564
565	LIST_REMOVE(domain, link);
566	DMAR_UNLOCK(dmar);
567
568	taskqueue_drain(dmar->delayed_taskqueue, &domain->unload_task);
569	dmar_domain_destroy(domain);
570}
571
572void
573dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
574{
575	struct sf_buf *sf;
576	dmar_ctx_entry_t *ctxp;
577	struct dmar_domain *domain;
578
579	DMAR_ASSERT_LOCKED(dmar);
580	KASSERT(ctx->refs >= 1,
581	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
582
583	/*
584	 * If our reference is not last, only the dereference should
585	 * be performed.
586	 */
587	if (ctx->refs > 1) {
588		ctx->refs--;
589		DMAR_UNLOCK(dmar);
590		return;
591	}
592
593	KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
594	    ("lost ref on disabled ctx %p", ctx));
595
596	/*
597	 * Otherwise, the context entry must be cleared before the
598	 * page table is destroyed.  The mapping of the context
599	 * entries page could require sleep, unlock the dmar.
600	 */
601	DMAR_UNLOCK(dmar);
602	TD_PREP_PINNED_ASSERT;
603	ctxp = dmar_map_ctx_entry(ctx, &sf);
604	DMAR_LOCK(dmar);
605	KASSERT(ctx->refs >= 1,
606	    ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
607
608	/*
609	 * Other thread might have referenced the context, in which
610	 * case again only the dereference should be performed.
611	 */
612	if (ctx->refs > 1) {
613		ctx->refs--;
614		DMAR_UNLOCK(dmar);
615		dmar_unmap_pgtbl(sf);
616		TD_PINNED_ASSERT;
617		return;
618	}
619
620	KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
621	    ("lost ref on disabled ctx %p", ctx));
622
623	/*
624	 * Clear the context pointer and flush the caches.
625	 * XXXKIB: cannot do this if any RMRR entries are still present.
626	 */
627	dmar_pte_clear(&ctxp->ctx1);
628	ctxp->ctx2 = 0;
629	dmar_flush_ctx_to_ram(dmar, ctxp);
630	dmar_inv_ctx_glob(dmar);
631	if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
632		if (dmar->qi_enabled)
633			dmar_qi_invalidate_iotlb_glob_locked(dmar);
634		else
635			dmar_inv_iotlb_glob(dmar);
636	}
637	dmar_unmap_pgtbl(sf);
638	domain = ctx->domain;
639	dmar_ctx_unlink(ctx);
640	free(ctx, M_DMAR_CTX);
641	dmar_unref_domain_locked(dmar, domain);
642	TD_PINNED_ASSERT;
643}
644
645void
646dmar_free_ctx(struct dmar_ctx *ctx)
647{
648	struct dmar_unit *dmar;
649
650	dmar = ctx->domain->dmar;
651	DMAR_LOCK(dmar);
652	dmar_free_ctx_locked(dmar, ctx);
653}
654
655/*
656 * Returns with the domain locked.
657 */
658struct dmar_ctx *
659dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
660{
661	struct dmar_domain *domain;
662	struct dmar_ctx *ctx;
663
664	DMAR_ASSERT_LOCKED(dmar);
665
666	LIST_FOREACH(domain, &dmar->domains, link) {
667		LIST_FOREACH(ctx, &domain->contexts, link) {
668			if (ctx->rid == rid)
669				return (ctx);
670		}
671	}
672	return (NULL);
673}
674
675void
676dmar_domain_free_entry(struct dmar_map_entry *entry, bool free)
677{
678	struct dmar_domain *domain;
679
680	domain = entry->domain;
681	DMAR_DOMAIN_LOCK(domain);
682	if ((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0)
683		dmar_gas_free_region(domain, entry);
684	else
685		dmar_gas_free_space(domain, entry);
686	DMAR_DOMAIN_UNLOCK(domain);
687	if (free)
688		dmar_gas_free_entry(domain, entry);
689	else
690		entry->flags = 0;
691}
692
693void
694dmar_domain_unload_entry(struct dmar_map_entry *entry, bool free)
695{
696	struct dmar_unit *unit;
697
698	unit = entry->domain->dmar;
699	if (unit->qi_enabled) {
700		DMAR_LOCK(unit);
701		dmar_qi_invalidate_locked(entry->domain, entry->start,
702		    entry->end - entry->start, &entry->gseq);
703		if (!free)
704			entry->flags |= DMAR_MAP_ENTRY_QI_NF;
705		TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link);
706		DMAR_UNLOCK(unit);
707	} else {
708		domain_flush_iotlb_sync(entry->domain, entry->start,
709		    entry->end - entry->start);
710		dmar_domain_free_entry(entry, free);
711	}
712}
713
714static struct dmar_qi_genseq *
715dmar_domain_unload_gseq(struct dmar_domain *domain,
716    struct dmar_map_entry *entry, struct dmar_qi_genseq *gseq)
717{
718
719	if (TAILQ_NEXT(entry, dmamap_link) != NULL)
720		return (NULL);
721	if (domain->batch_no++ % dmar_batch_coalesce != 0)
722		return (NULL);
723	return (gseq);
724}
725
726void
727dmar_domain_unload(struct dmar_domain *domain,
728    struct dmar_map_entries_tailq *entries, bool cansleep)
729{
730	struct dmar_unit *unit;
731	struct dmar_map_entry *entry, *entry1;
732	struct dmar_qi_genseq gseq;
733	int error;
734
735	unit = domain->dmar;
736
737	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
738		KASSERT((entry->flags & DMAR_MAP_ENTRY_MAP) != 0,
739		    ("not mapped entry %p %p", domain, entry));
740		error = domain_unmap_buf(domain, entry->start, entry->end -
741		    entry->start, cansleep ? DMAR_PGF_WAITOK : 0);
742		KASSERT(error == 0, ("unmap %p error %d", domain, error));
743		if (!unit->qi_enabled) {
744			domain_flush_iotlb_sync(domain, entry->start,
745			    entry->end - entry->start);
746			TAILQ_REMOVE(entries, entry, dmamap_link);
747			dmar_domain_free_entry(entry, true);
748		}
749	}
750	if (TAILQ_EMPTY(entries))
751		return;
752
753	KASSERT(unit->qi_enabled, ("loaded entry left"));
754	DMAR_LOCK(unit);
755	TAILQ_FOREACH(entry, entries, dmamap_link) {
756		entry->gseq.gen = 0;
757		entry->gseq.seq = 0;
758		dmar_qi_invalidate_locked(domain, entry->start, entry->end -
759		    entry->start, dmar_domain_unload_gseq(domain, entry,
760		    &gseq));
761	}
762	TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
763		entry->gseq = gseq;
764		TAILQ_REMOVE(entries, entry, dmamap_link);
765		TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link);
766	}
767	DMAR_UNLOCK(unit);
768}
769
770static void
771dmar_domain_unload_task(void *arg, int pending)
772{
773	struct dmar_domain *domain;
774	struct dmar_map_entries_tailq entries;
775
776	domain = arg;
777	TAILQ_INIT(&entries);
778
779	for (;;) {
780		DMAR_DOMAIN_LOCK(domain);
781		TAILQ_SWAP(&domain->unload_entries, &entries, dmar_map_entry,
782		    dmamap_link);
783		DMAR_DOMAIN_UNLOCK(domain);
784		if (TAILQ_EMPTY(&entries))
785			break;
786		dmar_domain_unload(domain, &entries, true);
787	}
788}
789