1/*-
2 * Copyright (c) 2013 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: releng/11.0/sys/x86/iommu/busdma_dmar.c 284869 2015-06-26 07:01:29Z kib $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/malloc.h>
36#include <sys/bus.h>
37#include <sys/conf.h>
38#include <sys/interrupt.h>
39#include <sys/kernel.h>
40#include <sys/ktr.h>
41#include <sys/lock.h>
42#include <sys/proc.h>
43#include <sys/memdesc.h>
44#include <sys/mutex.h>
45#include <sys/sysctl.h>
46#include <sys/rman.h>
47#include <sys/taskqueue.h>
48#include <sys/tree.h>
49#include <sys/uio.h>
50#include <sys/vmem.h>
51#include <dev/pci/pcireg.h>
52#include <dev/pci/pcivar.h>
53#include <vm/vm.h>
54#include <vm/vm_extern.h>
55#include <vm/vm_kern.h>
56#include <vm/vm_object.h>
57#include <vm/vm_page.h>
58#include <vm/vm_map.h>
59#include <machine/atomic.h>
60#include <machine/bus.h>
61#include <machine/md_var.h>
62#include <machine/specialreg.h>
63#include <x86/include/busdma_impl.h>
64#include <x86/iommu/intel_reg.h>
65#include <x86/iommu/busdma_dmar.h>
66#include <x86/iommu/intel_dmar.h>
67
68/*
69 * busdma_dmar.c, the implementation of the busdma(9) interface using
70 * DMAR units from Intel VT-d.
71 */
72
73static bool
74dmar_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
75{
76	char str[128], *env;
77
78	snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d.bounce",
79	    domain, bus, slot, func);
80	env = kern_getenv(str);
81	if (env == NULL)
82		return (false);
83	freeenv(env);
84	return (true);
85}
86
87/*
88 * Given original device, find the requester ID that will be seen by
89 * the DMAR unit and used for page table lookup.  PCI bridges may take
90 * ownership of transactions from downstream devices, so it may not be
91 * the same as the BSF of the target device.  In those cases, all
92 * devices downstream of the bridge must share a single mapping
93 * domain, and must collectively be assigned to use either DMAR or
94 * bounce mapping.
95 */
96device_t
97dmar_get_requester(device_t dev, uint16_t *rid)
98{
99	devclass_t pci_class;
100	device_t l, pci, pcib, pcip, pcibp, requester;
101	int cap_offset;
102	uint16_t pcie_flags;
103	bool bridge_is_pcie;
104
105	pci_class = devclass_find("pci");
106	l = requester = dev;
107
108	*rid = pci_get_rid(dev);
109
110	/*
111	 * Walk the bridge hierarchy from the target device to the
112	 * host port to find the translating bridge nearest the DMAR
113	 * unit.
114	 */
115	for (;;) {
116		pci = device_get_parent(l);
117		KASSERT(pci != NULL, ("dmar_get_requester(%s): NULL parent "
118		    "for %s", device_get_name(dev), device_get_name(l)));
119		KASSERT(device_get_devclass(pci) == pci_class,
120		    ("dmar_get_requester(%s): non-pci parent %s for %s",
121		    device_get_name(dev), device_get_name(pci),
122		    device_get_name(l)));
123
124		pcib = device_get_parent(pci);
125		KASSERT(pcib != NULL, ("dmar_get_requester(%s): NULL bridge "
126		    "for %s", device_get_name(dev), device_get_name(pci)));
127
128		/*
129		 * The parent of our "bridge" isn't another PCI bus,
130		 * so pcib isn't a PCI->PCI bridge but rather a host
131		 * port, and the requester ID won't be translated
132		 * further.
133		 */
134		pcip = device_get_parent(pcib);
135		if (device_get_devclass(pcip) != pci_class)
136			break;
137		pcibp = device_get_parent(pcip);
138
139		if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
140			/*
141			 * Do not stop the loop even if the target
142			 * device is PCIe, because it is possible (but
143			 * unlikely) to have a PCI->PCIe bridge
144			 * somewhere in the hierarchy.
145			 */
146			l = pcib;
147		} else {
148			/*
149			 * Device is not PCIe, it cannot be seen as a
150			 * requester by DMAR unit.  Check whether the
151			 * bridge is PCIe.
152			 */
153			bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
154			    &cap_offset) == 0;
155			requester = pcib;
156
157			/*
158			 * Check for a buggy PCIe/PCI bridge that
159			 * doesn't report the express capability.  If
160			 * the bridge above it is express but isn't a
161			 * PCI bridge, then we know pcib is actually a
162			 * PCIe/PCI bridge.
163			 */
164			if (!bridge_is_pcie && pci_find_cap(pcibp,
165			    PCIY_EXPRESS, &cap_offset) == 0) {
166				pcie_flags = pci_read_config(pcibp,
167				    cap_offset + PCIER_FLAGS, 2);
168				if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
169				    PCIEM_TYPE_PCI_BRIDGE)
170					bridge_is_pcie = true;
171			}
172
173			if (bridge_is_pcie) {
174				/*
175				 * The current device is not PCIe, but
176				 * the bridge above it is.  This is a
177				 * PCIe->PCI bridge.  Assume that the
178				 * requester ID will be the secondary
179				 * bus number with slot and function
180				 * set to zero.
181				 *
182				 * XXX: Doesn't handle the case where
183				 * the bridge is PCIe->PCI-X, and the
184				 * bridge will only take ownership of
185				 * requests in some cases.  We should
186				 * provide context entries with the
187				 * same page tables for taken and
188				 * non-taken transactions.
189				 */
190				*rid = PCI_RID(pci_get_bus(l), 0, 0);
191				l = pcibp;
192			} else {
193				/*
194				 * Neither the device nor the bridge
195				 * above it are PCIe.  This is a
196				 * conventional PCI->PCI bridge, which
197				 * will use the bridge's BSF as the
198				 * requester ID.
199				 */
200				*rid = pci_get_rid(pcib);
201				l = pcib;
202			}
203		}
204	}
205	return (requester);
206}
207
208struct dmar_ctx *
209dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr)
210{
211	device_t requester;
212	struct dmar_ctx *ctx;
213	bool disabled;
214	uint16_t rid;
215
216	requester = dmar_get_requester(dev, &rid);
217
218	/*
219	 * If the user requested the IOMMU disabled for the device, we
220	 * cannot disable the DMAR, due to possibility of other
221	 * devices on the same DMAR still requiring translation.
222	 * Instead provide the identity mapping for the device
223	 * context.
224	 */
225	disabled = dmar_bus_dma_is_dev_disabled(pci_get_domain(requester),
226	    pci_get_bus(requester), pci_get_slot(requester),
227	    pci_get_function(requester));
228	ctx = dmar_get_ctx_for_dev(dmar, requester, rid, disabled, rmrr);
229	if (ctx == NULL)
230		return (NULL);
231	if (disabled) {
232		/*
233		 * Keep the first reference on context, release the
234		 * later refs.
235		 */
236		DMAR_LOCK(dmar);
237		if ((ctx->flags & DMAR_CTX_DISABLED) == 0) {
238			ctx->flags |= DMAR_CTX_DISABLED;
239			DMAR_UNLOCK(dmar);
240		} else {
241			dmar_free_ctx_locked(dmar, ctx);
242		}
243		ctx = NULL;
244	}
245	return (ctx);
246}
247
248bus_dma_tag_t
249dmar_get_dma_tag(device_t dev, device_t child)
250{
251	struct dmar_unit *dmar;
252	struct dmar_ctx *ctx;
253	bus_dma_tag_t res;
254
255	dmar = dmar_find(child);
256	/* Not in scope of any DMAR ? */
257	if (dmar == NULL)
258		return (NULL);
259	if (!dmar->dma_enabled)
260		return (NULL);
261	dmar_quirks_pre_use(dmar);
262	dmar_instantiate_rmrr_ctxs(dmar);
263
264	ctx = dmar_instantiate_ctx(dmar, child, false);
265	res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag;
266	return (res);
267}
268
269static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map");
270
271static void dmar_bus_schedule_dmamap(struct dmar_unit *unit,
272    struct bus_dmamap_dmar *map);
273
274static int
275dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
276    bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
277    bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
278    int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
279    void *lockfuncarg, bus_dma_tag_t *dmat)
280{
281	struct bus_dma_tag_dmar *newtag, *oldtag;
282	int error;
283
284	*dmat = NULL;
285	error = common_bus_dma_tag_create(parent != NULL ?
286	    &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment,
287	    boundary, lowaddr, highaddr, filter, filterarg, maxsize,
288	    nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
289	    sizeof(struct bus_dma_tag_dmar), (void **)&newtag);
290	if (error != 0)
291		goto out;
292
293	oldtag = (struct bus_dma_tag_dmar *)parent;
294	newtag->common.impl = &bus_dma_dmar_impl;
295	newtag->ctx = oldtag->ctx;
296	newtag->owner = oldtag->owner;
297
298	*dmat = (bus_dma_tag_t)newtag;
299out:
300	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
301	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
302	    error);
303	return (error);
304}
305
306static int
307dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
308{
309	struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent;
310	int error;
311
312	error = 0;
313	dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1;
314
315	if (dmat != NULL) {
316		if (dmat->map_count != 0) {
317			error = EBUSY;
318			goto out;
319		}
320		while (dmat != NULL) {
321			parent = (struct bus_dma_tag_dmar *)dmat->common.parent;
322			if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
323			    1) {
324				if (dmat == &dmat->ctx->ctx_tag)
325					dmar_free_ctx(dmat->ctx);
326				free(dmat->segments, M_DMAR_DMAMAP);
327				free(dmat, M_DEVBUF);
328				dmat = parent;
329			} else
330				dmat = NULL;
331		}
332	}
333out:
334	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
335	return (error);
336}
337
338static int
339dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
340{
341	struct bus_dma_tag_dmar *tag;
342	struct bus_dmamap_dmar *map;
343
344	tag = (struct bus_dma_tag_dmar *)dmat;
345	map = malloc(sizeof(*map), M_DMAR_DMAMAP, M_NOWAIT | M_ZERO);
346	if (map == NULL) {
347		*mapp = NULL;
348		return (ENOMEM);
349	}
350	if (tag->segments == NULL) {
351		tag->segments = malloc(sizeof(bus_dma_segment_t) *
352		    tag->common.nsegments, M_DMAR_DMAMAP, M_NOWAIT);
353		if (tag->segments == NULL) {
354			free(map, M_DMAR_DMAMAP);
355			*mapp = NULL;
356			return (ENOMEM);
357		}
358	}
359	TAILQ_INIT(&map->map_entries);
360	map->tag = tag;
361	map->locked = true;
362	map->cansleep = false;
363	tag->map_count++;
364	*mapp = (bus_dmamap_t)map;
365
366	return (0);
367}
368
369static int
370dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
371{
372	struct bus_dma_tag_dmar *tag;
373	struct bus_dmamap_dmar *map;
374	struct dmar_domain *domain;
375
376	tag = (struct bus_dma_tag_dmar *)dmat;
377	map = (struct bus_dmamap_dmar *)map1;
378	if (map != NULL) {
379		domain = tag->ctx->domain;
380		DMAR_DOMAIN_LOCK(domain);
381		if (!TAILQ_EMPTY(&map->map_entries)) {
382			DMAR_DOMAIN_UNLOCK(domain);
383			return (EBUSY);
384		}
385		DMAR_DOMAIN_UNLOCK(domain);
386		free(map, M_DMAR_DMAMAP);
387	}
388	tag->map_count--;
389	return (0);
390}
391
392
393static int
394dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
395    bus_dmamap_t *mapp)
396{
397	struct bus_dma_tag_dmar *tag;
398	struct bus_dmamap_dmar *map;
399	int error, mflags;
400	vm_memattr_t attr;
401
402	error = dmar_bus_dmamap_create(dmat, flags, mapp);
403	if (error != 0)
404		return (error);
405
406	mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
407	mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
408	attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
409	    VM_MEMATTR_DEFAULT;
410
411	tag = (struct bus_dma_tag_dmar *)dmat;
412	map = (struct bus_dmamap_dmar *)*mapp;
413
414	if (tag->common.maxsize < PAGE_SIZE &&
415	    tag->common.alignment <= tag->common.maxsize &&
416	    attr == VM_MEMATTR_DEFAULT) {
417		*vaddr = malloc(tag->common.maxsize, M_DEVBUF, mflags);
418		map->flags |= BUS_DMAMAP_DMAR_MALLOC;
419	} else {
420		*vaddr = (void *)kmem_alloc_attr(kernel_arena,
421		    tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR,
422		    attr);
423		map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC;
424	}
425	if (*vaddr == NULL) {
426		dmar_bus_dmamap_destroy(dmat, *mapp);
427		*mapp = NULL;
428		return (ENOMEM);
429	}
430	return (0);
431}
432
433static void
434dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
435{
436	struct bus_dma_tag_dmar *tag;
437	struct bus_dmamap_dmar *map;
438
439	tag = (struct bus_dma_tag_dmar *)dmat;
440	map = (struct bus_dmamap_dmar *)map1;
441
442	if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) {
443		free(vaddr, M_DEVBUF);
444		map->flags &= ~BUS_DMAMAP_DMAR_MALLOC;
445	} else {
446		KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0,
447		    ("dmar_bus_dmamem_free for non alloced map %p", map));
448		kmem_free(kernel_arena, (vm_offset_t)vaddr, tag->common.maxsize);
449		map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC;
450	}
451
452	dmar_bus_dmamap_destroy(dmat, map1);
453}
454
455static int
456dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag,
457    struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
458    int flags, bus_dma_segment_t *segs, int *segp,
459    struct dmar_map_entries_tailq *unroll_list)
460{
461	struct dmar_ctx *ctx;
462	struct dmar_domain *domain;
463	struct dmar_map_entry *entry;
464	dmar_gaddr_t size;
465	bus_size_t buflen1;
466	int error, idx, gas_flags, seg;
467
468	KASSERT(offset < DMAR_PAGE_SIZE, ("offset %d", offset));
469	if (segs == NULL)
470		segs = tag->segments;
471	ctx = tag->ctx;
472	domain = ctx->domain;
473	seg = *segp;
474	error = 0;
475	idx = 0;
476	while (buflen > 0) {
477		seg++;
478		if (seg >= tag->common.nsegments) {
479			error = EFBIG;
480			break;
481		}
482		buflen1 = buflen > tag->common.maxsegsz ?
483		    tag->common.maxsegsz : buflen;
484		size = round_page(offset + buflen1);
485
486		/*
487		 * (Too) optimistically allow split if there are more
488		 * then one segments left.
489		 */
490		gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0;
491		if (seg + 1 < tag->common.nsegments)
492			gas_flags |= DMAR_GM_CANSPLIT;
493
494		error = dmar_gas_map(domain, &tag->common, size, offset,
495		    DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
496		    gas_flags, ma + idx, &entry);
497		if (error != 0)
498			break;
499		if ((gas_flags & DMAR_GM_CANSPLIT) != 0) {
500			KASSERT(size >= entry->end - entry->start,
501			    ("split increased entry size %jx %jx %jx",
502			    (uintmax_t)size, (uintmax_t)entry->start,
503			    (uintmax_t)entry->end));
504			size = entry->end - entry->start;
505			if (buflen1 > size)
506				buflen1 = size;
507		} else {
508			KASSERT(entry->end - entry->start == size,
509			    ("no split allowed %jx %jx %jx",
510			    (uintmax_t)size, (uintmax_t)entry->start,
511			    (uintmax_t)entry->end));
512		}
513		if (offset + buflen1 > size)
514			buflen1 = size - offset;
515		if (buflen1 > tag->common.maxsegsz)
516			buflen1 = tag->common.maxsegsz;
517
518		KASSERT(((entry->start + offset) & (tag->common.alignment - 1))
519		    == 0,
520		    ("alignment failed: ctx %p start 0x%jx offset %x "
521		    "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
522		    (uintmax_t)tag->common.alignment));
523		KASSERT(entry->end <= tag->common.lowaddr ||
524		    entry->start >= tag->common.highaddr,
525		    ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
526		    "lowaddr 0x%jx highaddr 0x%jx", ctx,
527		    (uintmax_t)entry->start, (uintmax_t)entry->end,
528		    (uintmax_t)tag->common.lowaddr,
529		    (uintmax_t)tag->common.highaddr));
530		KASSERT(dmar_test_boundary(entry->start + offset, buflen1,
531		    tag->common.boundary),
532		    ("boundary failed: ctx %p start 0x%jx end 0x%jx "
533		    "boundary 0x%jx", ctx, (uintmax_t)entry->start,
534		    (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
535		KASSERT(buflen1 <= tag->common.maxsegsz,
536		    ("segment too large: ctx %p start 0x%jx end 0x%jx "
537		    "buflen1 0x%jx maxsegsz 0x%jx", ctx,
538		    (uintmax_t)entry->start, (uintmax_t)entry->end,
539		    (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
540
541		DMAR_DOMAIN_LOCK(domain);
542		TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
543		entry->flags |= DMAR_MAP_ENTRY_MAP;
544		DMAR_DOMAIN_UNLOCK(domain);
545		TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
546
547		segs[seg].ds_addr = entry->start + offset;
548		segs[seg].ds_len = buflen1;
549
550		idx += OFF_TO_IDX(trunc_page(offset + buflen1));
551		offset += buflen1;
552		offset &= DMAR_PAGE_MASK;
553		buflen -= buflen1;
554	}
555	if (error == 0)
556		*segp = seg;
557	return (error);
558}
559
560static int
561dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag,
562    struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
563    int flags, bus_dma_segment_t *segs, int *segp)
564{
565	struct dmar_ctx *ctx;
566	struct dmar_domain *domain;
567	struct dmar_map_entry *entry, *entry1;
568	struct dmar_map_entries_tailq unroll_list;
569	int error;
570
571	ctx = tag->ctx;
572	domain = ctx->domain;
573	atomic_add_long(&ctx->loads, 1);
574
575	TAILQ_INIT(&unroll_list);
576	error = dmar_bus_dmamap_load_something1(tag, map, ma, offset,
577	    buflen, flags, segs, segp, &unroll_list);
578	if (error != 0) {
579		/*
580		 * The busdma interface does not allow us to report
581		 * partial buffer load, so unfortunately we have to
582		 * revert all work done.
583		 */
584		DMAR_DOMAIN_LOCK(domain);
585		TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link,
586		    entry1) {
587			/*
588			 * No entries other than what we have created
589			 * during the failed run might have been
590			 * inserted there in between, since we own ctx
591			 * pglock.
592			 */
593			TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
594			TAILQ_REMOVE(&unroll_list, entry, unroll_link);
595			TAILQ_INSERT_TAIL(&domain->unload_entries, entry,
596			    dmamap_link);
597		}
598		DMAR_DOMAIN_UNLOCK(domain);
599		taskqueue_enqueue(domain->dmar->delayed_taskqueue,
600		    &domain->unload_task);
601	}
602
603	if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
604	    !map->cansleep)
605		error = EINPROGRESS;
606	if (error == EINPROGRESS)
607		dmar_bus_schedule_dmamap(domain->dmar, map);
608	return (error);
609}
610
611static int
612dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
613    struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
614    bus_dma_segment_t *segs, int *segp)
615{
616	struct bus_dma_tag_dmar *tag;
617	struct bus_dmamap_dmar *map;
618
619	tag = (struct bus_dma_tag_dmar *)dmat;
620	map = (struct bus_dmamap_dmar *)map1;
621	return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
622	    flags, segs, segp));
623}
624
625static int
626dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
627    vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
628    int *segp)
629{
630	struct bus_dma_tag_dmar *tag;
631	struct bus_dmamap_dmar *map;
632	vm_page_t *ma;
633	vm_paddr_t pstart, pend;
634	int error, i, ma_cnt, offset;
635
636	tag = (struct bus_dma_tag_dmar *)dmat;
637	map = (struct bus_dmamap_dmar *)map1;
638	pstart = trunc_page(buf);
639	pend = round_page(buf + buflen);
640	offset = buf & PAGE_MASK;
641	ma_cnt = OFF_TO_IDX(pend - pstart);
642	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
643	    M_WAITOK : M_NOWAIT);
644	if (ma == NULL)
645		return (ENOMEM);
646	for (i = 0; i < ma_cnt; i++)
647		ma[i] = PHYS_TO_VM_PAGE(pstart + i * PAGE_SIZE);
648	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
649	    flags, segs, segp);
650	free(ma, M_DEVBUF);
651	return (error);
652}
653
654static int
655dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
656    bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
657    int *segp)
658{
659	struct bus_dma_tag_dmar *tag;
660	struct bus_dmamap_dmar *map;
661	vm_page_t *ma, fma;
662	vm_paddr_t pstart, pend, paddr;
663	int error, i, ma_cnt, offset;
664
665	tag = (struct bus_dma_tag_dmar *)dmat;
666	map = (struct bus_dmamap_dmar *)map1;
667	pstart = trunc_page((vm_offset_t)buf);
668	pend = round_page((vm_offset_t)buf + buflen);
669	offset = (vm_offset_t)buf & PAGE_MASK;
670	ma_cnt = OFF_TO_IDX(pend - pstart);
671	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
672	    M_WAITOK : M_NOWAIT);
673	if (ma == NULL)
674		return (ENOMEM);
675	if (dumping) {
676		/*
677		 * If dumping, do not attempt to call
678		 * PHYS_TO_VM_PAGE() at all.  It may return non-NULL
679		 * but the vm_page returned might be not initialized,
680		 * e.g. for the kernel itself.
681		 */
682		KASSERT(pmap == kernel_pmap, ("non-kernel address write"));
683		fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF,
684		    M_ZERO | (map->cansleep ? M_WAITOK : M_NOWAIT));
685		if (fma == NULL) {
686			free(ma, M_DEVBUF);
687			return (ENOMEM);
688		}
689		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
690			paddr = pmap_kextract(pstart);
691			vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
692			ma[i] = &fma[i];
693		}
694	} else {
695		fma = NULL;
696		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
697			if (pmap == kernel_pmap)
698				paddr = pmap_kextract(pstart);
699			else
700				paddr = pmap_extract(pmap, pstart);
701			ma[i] = PHYS_TO_VM_PAGE(paddr);
702			KASSERT(VM_PAGE_TO_PHYS(ma[i]) == paddr,
703			    ("PHYS_TO_VM_PAGE failed %jx %jx m %p",
704			    (uintmax_t)paddr, (uintmax_t)VM_PAGE_TO_PHYS(ma[i]),
705			    ma[i]));
706		}
707	}
708	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
709	    flags, segs, segp);
710	free(ma, M_DEVBUF);
711	free(fma, M_DEVBUF);
712	return (error);
713}
714
715static void
716dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
717    struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
718{
719	struct bus_dmamap_dmar *map;
720
721	if (map1 == NULL)
722		return;
723	map = (struct bus_dmamap_dmar *)map1;
724	map->mem = *mem;
725	map->tag = (struct bus_dma_tag_dmar *)dmat;
726	map->callback = callback;
727	map->callback_arg = callback_arg;
728}
729
730static bus_dma_segment_t *
731dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
732    bus_dma_segment_t *segs, int nsegs, int error)
733{
734	struct bus_dma_tag_dmar *tag;
735	struct bus_dmamap_dmar *map;
736
737	tag = (struct bus_dma_tag_dmar *)dmat;
738	map = (struct bus_dmamap_dmar *)map1;
739
740	if (!map->locked) {
741		KASSERT(map->cansleep,
742		    ("map not locked and not sleepable context %p", map));
743
744		/*
745		 * We are called from the delayed context.  Relock the
746		 * driver.
747		 */
748		(tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
749		map->locked = true;
750	}
751
752	if (segs == NULL)
753		segs = tag->segments;
754	return (segs);
755}
756
757/*
758 * The limitations of busdma KPI forces the dmar to perform the actual
759 * unload, consisting of the unmapping of the map entries page tables,
760 * from the delayed context on i386, since page table page mapping
761 * might require a sleep to be successfull.  The unfortunate
762 * consequence is that the DMA requests can be served some time after
763 * the bus_dmamap_unload() call returned.
764 *
765 * On amd64, we assume that sf allocation cannot fail.
766 */
767static void
768dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
769{
770	struct bus_dma_tag_dmar *tag;
771	struct bus_dmamap_dmar *map;
772	struct dmar_ctx *ctx;
773	struct dmar_domain *domain;
774#if defined(__amd64__)
775	struct dmar_map_entries_tailq entries;
776#endif
777
778	tag = (struct bus_dma_tag_dmar *)dmat;
779	map = (struct bus_dmamap_dmar *)map1;
780	ctx = tag->ctx;
781	domain = ctx->domain;
782	atomic_add_long(&ctx->unloads, 1);
783
784#if defined(__i386__)
785	DMAR_DOMAIN_LOCK(domain);
786	TAILQ_CONCAT(&domain->unload_entries, &map->map_entries, dmamap_link);
787	DMAR_DOMAIN_UNLOCK(domain);
788	taskqueue_enqueue(domain->dmar->delayed_taskqueue,
789	    &domain->unload_task);
790#else /* defined(__amd64__) */
791	TAILQ_INIT(&entries);
792	DMAR_DOMAIN_LOCK(domain);
793	TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
794	DMAR_DOMAIN_UNLOCK(domain);
795	THREAD_NO_SLEEPING();
796	dmar_domain_unload(domain, &entries, false);
797	THREAD_SLEEPING_OK();
798	KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx));
799#endif
800}
801
802static void
803dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
804    bus_dmasync_op_t op)
805{
806}
807
808struct bus_dma_impl bus_dma_dmar_impl = {
809	.tag_create = dmar_bus_dma_tag_create,
810	.tag_destroy = dmar_bus_dma_tag_destroy,
811	.map_create = dmar_bus_dmamap_create,
812	.map_destroy = dmar_bus_dmamap_destroy,
813	.mem_alloc = dmar_bus_dmamem_alloc,
814	.mem_free = dmar_bus_dmamem_free,
815	.load_phys = dmar_bus_dmamap_load_phys,
816	.load_buffer = dmar_bus_dmamap_load_buffer,
817	.load_ma = dmar_bus_dmamap_load_ma,
818	.map_waitok = dmar_bus_dmamap_waitok,
819	.map_complete = dmar_bus_dmamap_complete,
820	.map_unload = dmar_bus_dmamap_unload,
821	.map_sync = dmar_bus_dmamap_sync
822};
823
824static void
825dmar_bus_task_dmamap(void *arg, int pending)
826{
827	struct bus_dma_tag_dmar *tag;
828	struct bus_dmamap_dmar *map;
829	struct dmar_unit *unit;
830
831	unit = arg;
832	DMAR_LOCK(unit);
833	while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
834		TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
835		DMAR_UNLOCK(unit);
836		tag = map->tag;
837		map->cansleep = true;
838		map->locked = false;
839		bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
840		    &map->mem, map->callback, map->callback_arg,
841		    BUS_DMA_WAITOK);
842		map->cansleep = false;
843		if (map->locked) {
844			(tag->common.lockfunc)(tag->common.lockfuncarg,
845			    BUS_DMA_UNLOCK);
846		} else
847			map->locked = true;
848		map->cansleep = false;
849		DMAR_LOCK(unit);
850	}
851	DMAR_UNLOCK(unit);
852}
853
854static void
855dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map)
856{
857
858	map->locked = false;
859	DMAR_LOCK(unit);
860	TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
861	DMAR_UNLOCK(unit);
862	taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
863}
864
865int
866dmar_init_busdma(struct dmar_unit *unit)
867{
868
869	unit->dma_enabled = 1;
870	TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled);
871	TAILQ_INIT(&unit->delayed_maps);
872	TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit);
873	unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK,
874	    taskqueue_thread_enqueue, &unit->delayed_taskqueue);
875	taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
876	    "dmar%d busdma taskq", unit->unit);
877	return (0);
878}
879
880void
881dmar_fini_busdma(struct dmar_unit *unit)
882{
883
884	if (unit->delayed_taskqueue == NULL)
885		return;
886
887	taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
888	taskqueue_free(unit->delayed_taskqueue);
889	unit->delayed_taskqueue = NULL;
890}
891