1257251Skib/*-
2257251Skib * Copyright (c) 2013 The FreeBSD Foundation
3257251Skib * All rights reserved.
4257251Skib *
5257251Skib * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6257251Skib * under sponsorship from the FreeBSD Foundation.
7257251Skib *
8257251Skib * Redistribution and use in source and binary forms, with or without
9257251Skib * modification, are permitted provided that the following conditions
10257251Skib * are met:
11257251Skib * 1. Redistributions of source code must retain the above copyright
12257251Skib *    notice, this list of conditions and the following disclaimer.
13257251Skib * 2. Redistributions in binary form must reproduce the above copyright
14257251Skib *    notice, this list of conditions and the following disclaimer in the
15257251Skib *    documentation and/or other materials provided with the distribution.
16257251Skib *
17257251Skib * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18257251Skib * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19257251Skib * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20257251Skib * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21257251Skib * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22257251Skib * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23257251Skib * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24257251Skib * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25257251Skib * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26257251Skib * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27257251Skib * SUCH DAMAGE.
28257251Skib */
29257251Skib
30257251Skib#include <sys/cdefs.h>
31257251Skib__FBSDID("$FreeBSD: releng/10.3/sys/x86/iommu/busdma_dmar.c 284021 2015-06-05 08:36:25Z kib $");
32257251Skib
33257251Skib#include <sys/param.h>
34257251Skib#include <sys/systm.h>
35257251Skib#include <sys/malloc.h>
36257251Skib#include <sys/bus.h>
37257251Skib#include <sys/conf.h>
38257251Skib#include <sys/interrupt.h>
39257251Skib#include <sys/kernel.h>
40257251Skib#include <sys/ktr.h>
41257251Skib#include <sys/lock.h>
42257251Skib#include <sys/proc.h>
43257251Skib#include <sys/memdesc.h>
44257251Skib#include <sys/mutex.h>
45257251Skib#include <sys/sysctl.h>
46257251Skib#include <sys/rman.h>
47257251Skib#include <sys/taskqueue.h>
48257251Skib#include <sys/tree.h>
49257251Skib#include <sys/uio.h>
50263747Skib#include <dev/pci/pcireg.h>
51257251Skib#include <dev/pci/pcivar.h>
52257251Skib#include <vm/vm.h>
53257251Skib#include <vm/vm_extern.h>
54257251Skib#include <vm/vm_kern.h>
55257251Skib#include <vm/vm_object.h>
56257251Skib#include <vm/vm_page.h>
57257251Skib#include <vm/vm_map.h>
58257251Skib#include <machine/atomic.h>
59257251Skib#include <machine/bus.h>
60257251Skib#include <machine/md_var.h>
61257251Skib#include <machine/specialreg.h>
62257251Skib#include <x86/include/busdma_impl.h>
63257251Skib#include <x86/iommu/intel_reg.h>
64257251Skib#include <x86/iommu/busdma_dmar.h>
65257251Skib#include <x86/iommu/intel_dmar.h>
66257251Skib
67257251Skib/*
68257251Skib * busdma_dmar.c, the implementation of the busdma(9) interface using
69257251Skib * DMAR units from Intel VT-d.
70257251Skib */
71257251Skib
72257251Skibstatic bool
73263747Skibdmar_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
74257251Skib{
75257251Skib	char str[128], *env;
76257251Skib
77257251Skib	snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d.bounce",
78257251Skib	    domain, bus, slot, func);
79257251Skib	env = getenv(str);
80257251Skib	if (env == NULL)
81257251Skib		return (false);
82257251Skib	freeenv(env);
83257251Skib	return (true);
84257251Skib}
85257251Skib
86263747Skib/*
87263747Skib * Given original device, find the requester ID that will be seen by
88263747Skib * the DMAR unit and used for page table lookup.  PCI bridges may take
89263747Skib * ownership of transactions from downstream devices, so it may not be
90263747Skib * the same as the BSF of the target device.  In those cases, all
91263747Skib * devices downstream of the bridge must share a single mapping
92263747Skib * domain, and must collectively be assigned to use either DMAR or
93263747Skib * bounce mapping.
94263747Skib */
95263747Skibstatic device_t
96279470Srstonedmar_get_requester(device_t dev, uint16_t *rid)
97263747Skib{
98263747Skib	devclass_t pci_class;
99279486Skib	device_t l, pci, pcib, pcip, pcibp, requester;
100263747Skib	int cap_offset;
101279486Skib	uint16_t pcie_flags;
102279486Skib	bool bridge_is_pcie;
103263747Skib
104263747Skib	pci_class = devclass_find("pci");
105279486Skib	l = requester = dev;
106263747Skib
107279470Srstone	*rid = pci_get_rid(dev);
108263747Skib
109263747Skib	/*
110263747Skib	 * Walk the bridge hierarchy from the target device to the
111263747Skib	 * host port to find the translating bridge nearest the DMAR
112263747Skib	 * unit.
113263747Skib	 */
114263747Skib	for (;;) {
115279486Skib		pci = device_get_parent(l);
116279486Skib		KASSERT(pci != NULL, ("dmar_get_requester(%s): NULL parent "
117279486Skib		    "for %s", device_get_name(dev), device_get_name(l)));
118263747Skib		KASSERT(device_get_devclass(pci) == pci_class,
119279486Skib		    ("dmar_get_requester(%s): non-pci parent %s for %s",
120279486Skib		    device_get_name(dev), device_get_name(pci),
121279486Skib		    device_get_name(l)));
122263747Skib
123263747Skib		pcib = device_get_parent(pci);
124279486Skib		KASSERT(pcib != NULL, ("dmar_get_requester(%s): NULL bridge "
125279486Skib		    "for %s", device_get_name(dev), device_get_name(pci)));
126263747Skib
127263747Skib		/*
128263747Skib		 * The parent of our "bridge" isn't another PCI bus,
129263747Skib		 * so pcib isn't a PCI->PCI bridge but rather a host
130263747Skib		 * port, and the requester ID won't be translated
131263747Skib		 * further.
132263747Skib		 */
133279486Skib		pcip = device_get_parent(pcib);
134279486Skib		if (device_get_devclass(pcip) != pci_class)
135263747Skib			break;
136279486Skib		pcibp = device_get_parent(pcip);
137263747Skib
138279486Skib		if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
139263747Skib			/*
140279486Skib			 * Do not stop the loop even if the target
141279486Skib			 * device is PCIe, because it is possible (but
142279486Skib			 * unlikely) to have a PCI->PCIe bridge
143279486Skib			 * somewhere in the hierarchy.
144279486Skib			 */
145279486Skib			l = pcib;
146279486Skib		} else {
147279486Skib			/*
148263747Skib			 * Device is not PCIe, it cannot be seen as a
149279486Skib			 * requester by DMAR unit.  Check whether the
150279486Skib			 * bridge is PCIe.
151263747Skib			 */
152279486Skib			bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
153279486Skib			    &cap_offset) == 0;
154263747Skib			requester = pcib;
155263747Skib
156279486Skib			/*
157279486Skib			 * Check for a buggy PCIe/PCI bridge that
158279486Skib			 * doesn't report the express capability.  If
159279486Skib			 * the bridge above it is express but isn't a
160279486Skib			 * PCI bridge, then we know pcib is actually a
161279486Skib			 * PCIe/PCI bridge.
162279486Skib			 */
163279486Skib			if (!bridge_is_pcie && pci_find_cap(pcibp,
164279486Skib			    PCIY_EXPRESS, &cap_offset) == 0) {
165279486Skib				pcie_flags = pci_read_config(pcibp,
166279486Skib				    cap_offset + PCIER_FLAGS, 2);
167279486Skib				if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
168279486Skib				    PCIEM_TYPE_PCI_BRIDGE)
169279486Skib					bridge_is_pcie = true;
170279486Skib			}
171279486Skib
172279486Skib			if (bridge_is_pcie) {
173263747Skib				/*
174263747Skib				 * The current device is not PCIe, but
175263747Skib				 * the bridge above it is.  This is a
176263747Skib				 * PCIe->PCI bridge.  Assume that the
177263747Skib				 * requester ID will be the secondary
178263747Skib				 * bus number with slot and function
179263747Skib				 * set to zero.
180263747Skib				 *
181263747Skib				 * XXX: Doesn't handle the case where
182263747Skib				 * the bridge is PCIe->PCI-X, and the
183263747Skib				 * bridge will only take ownership of
184263747Skib				 * requests in some cases.  We should
185263747Skib				 * provide context entries with the
186263747Skib				 * same page tables for taken and
187263747Skib				 * non-taken transactions.
188263747Skib				 */
189279486Skib				*rid = PCI_RID(pci_get_bus(l), 0, 0);
190279486Skib				l = pcibp;
191263747Skib			} else {
192263747Skib				/*
193263747Skib				 * Neither the device nor the bridge
194263747Skib				 * above it are PCIe.  This is a
195263747Skib				 * conventional PCI->PCI bridge, which
196263747Skib				 * will use the bridge's BSF as the
197263747Skib				 * requester ID.
198263747Skib				 */
199279470Srstone				*rid = pci_get_rid(pcib);
200279486Skib				l = pcib;
201263747Skib			}
202263747Skib		}
203263747Skib	}
204263747Skib	return (requester);
205263747Skib}
206263747Skib
207257251Skibstruct dmar_ctx *
208257251Skibdmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr)
209257251Skib{
210263747Skib	device_t requester;
211257251Skib	struct dmar_ctx *ctx;
212257251Skib	bool disabled;
213279470Srstone	uint16_t rid;
214257251Skib
215279470Srstone	requester = dmar_get_requester(dev, &rid);
216263747Skib
217257251Skib	/*
218257251Skib	 * If the user requested the IOMMU disabled for the device, we
219257251Skib	 * cannot disable the DMAR, due to possibility of other
220257251Skib	 * devices on the same DMAR still requiring translation.
221257251Skib	 * Instead provide the identity mapping for the device
222257251Skib	 * context.
223257251Skib	 */
224279470Srstone	disabled = dmar_bus_dma_is_dev_disabled(pci_get_domain(requester),
225279470Srstone	    pci_get_bus(requester), pci_get_slot(requester),
226279470Srstone	    pci_get_function(requester));
227279470Srstone	ctx = dmar_get_ctx(dmar, requester, rid, disabled, rmrr);
228257251Skib	if (ctx == NULL)
229257251Skib		return (NULL);
230257251Skib	if (disabled) {
231257251Skib		/*
232257251Skib		 * Keep the first reference on context, release the
233257251Skib		 * later refs.
234257251Skib		 */
235257251Skib		DMAR_LOCK(dmar);
236257251Skib		if ((ctx->flags & DMAR_CTX_DISABLED) == 0) {
237257251Skib			ctx->flags |= DMAR_CTX_DISABLED;
238257251Skib			DMAR_UNLOCK(dmar);
239257251Skib		} else {
240257251Skib			dmar_free_ctx_locked(dmar, ctx);
241257251Skib		}
242257251Skib		ctx = NULL;
243257251Skib	}
244257251Skib	return (ctx);
245257251Skib}
246257251Skib
247257251Skibbus_dma_tag_t
248257251Skibdmar_get_dma_tag(device_t dev, device_t child)
249257251Skib{
250257251Skib	struct dmar_unit *dmar;
251257251Skib	struct dmar_ctx *ctx;
252257251Skib	bus_dma_tag_t res;
253257251Skib
254257251Skib	dmar = dmar_find(child);
255257251Skib	/* Not in scope of any DMAR ? */
256257251Skib	if (dmar == NULL)
257257251Skib		return (NULL);
258257251Skib	dmar_quirks_pre_use(dmar);
259257251Skib	dmar_instantiate_rmrr_ctxs(dmar);
260257251Skib
261257251Skib	ctx = dmar_instantiate_ctx(dmar, child, false);
262257251Skib	res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag;
263257251Skib	return (res);
264257251Skib}
265257251Skib
266257251Skibstatic MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map");
267257251Skib
268257251Skibstatic void dmar_bus_schedule_dmamap(struct dmar_unit *unit,
269257251Skib    struct bus_dmamap_dmar *map);
270257251Skib
271257251Skibstatic int
272257251Skibdmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
273257251Skib    bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
274257251Skib    bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
275257251Skib    int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
276257251Skib    void *lockfuncarg, bus_dma_tag_t *dmat)
277257251Skib{
278257251Skib	struct bus_dma_tag_dmar *newtag, *oldtag;
279257251Skib	int error;
280257251Skib
281257251Skib	*dmat = NULL;
282257251Skib	error = common_bus_dma_tag_create(parent != NULL ?
283257251Skib	    &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment,
284257251Skib	    boundary, lowaddr, highaddr, filter, filterarg, maxsize,
285257251Skib	    nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
286257251Skib	    sizeof(struct bus_dma_tag_dmar), (void **)&newtag);
287257251Skib	if (error != 0)
288259512Skib		goto out;
289257251Skib
290257251Skib	oldtag = (struct bus_dma_tag_dmar *)parent;
291257251Skib	newtag->common.impl = &bus_dma_dmar_impl;
292257251Skib	newtag->ctx = oldtag->ctx;
293257251Skib	newtag->owner = oldtag->owner;
294257251Skib
295259512Skib	*dmat = (bus_dma_tag_t)newtag;
296259512Skibout:
297257251Skib	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
298257251Skib	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
299257251Skib	    error);
300257251Skib	return (error);
301257251Skib}
302257251Skib
303257251Skibstatic int
304257251Skibdmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
305257251Skib{
306257251Skib	struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent;
307257251Skib	int error;
308257251Skib
309257251Skib	error = 0;
310257251Skib	dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1;
311257251Skib
312257251Skib	if (dmat != NULL) {
313257251Skib		if (dmat->map_count != 0) {
314257251Skib			error = EBUSY;
315257251Skib			goto out;
316257251Skib		}
317257251Skib		while (dmat != NULL) {
318257251Skib			parent = (struct bus_dma_tag_dmar *)dmat->common.parent;
319257251Skib			if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
320257251Skib			    1) {
321257251Skib				if (dmat == &dmat->ctx->ctx_tag)
322257251Skib					dmar_free_ctx(dmat->ctx);
323257251Skib				free(dmat->segments, M_DMAR_DMAMAP);
324257251Skib				free(dmat, M_DEVBUF);
325257251Skib				dmat = parent;
326257251Skib			} else
327257251Skib				dmat = NULL;
328257251Skib		}
329257251Skib	}
330257251Skibout:
331257251Skib	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
332257251Skib	return (error);
333257251Skib}
334257251Skib
335257251Skibstatic int
336257251Skibdmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
337257251Skib{
338257251Skib	struct bus_dma_tag_dmar *tag;
339257251Skib	struct bus_dmamap_dmar *map;
340257251Skib
341257251Skib	tag = (struct bus_dma_tag_dmar *)dmat;
342257251Skib	map = malloc(sizeof(*map), M_DMAR_DMAMAP, M_NOWAIT | M_ZERO);
343257251Skib	if (map == NULL) {
344257251Skib		*mapp = NULL;
345257251Skib		return (ENOMEM);
346257251Skib	}
347257251Skib	if (tag->segments == NULL) {
348257251Skib		tag->segments = malloc(sizeof(bus_dma_segment_t) *
349257251Skib		    tag->common.nsegments, M_DMAR_DMAMAP, M_NOWAIT);
350257251Skib		if (tag->segments == NULL) {
351257251Skib			free(map, M_DMAR_DMAMAP);
352257251Skib			*mapp = NULL;
353257251Skib			return (ENOMEM);
354257251Skib		}
355257251Skib	}
356257251Skib	TAILQ_INIT(&map->map_entries);
357257251Skib	map->tag = tag;
358257251Skib	map->locked = true;
359257251Skib	map->cansleep = false;
360257251Skib	tag->map_count++;
361257251Skib	*mapp = (bus_dmamap_t)map;
362263470Skib
363257251Skib	return (0);
364257251Skib}
365257251Skib
366257251Skibstatic int
367257251Skibdmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
368257251Skib{
369257251Skib	struct bus_dma_tag_dmar *tag;
370257251Skib	struct bus_dmamap_dmar *map;
371257251Skib
372257251Skib	tag = (struct bus_dma_tag_dmar *)dmat;
373257251Skib	map = (struct bus_dmamap_dmar *)map1;
374257251Skib	if (map != NULL) {
375257251Skib		DMAR_CTX_LOCK(tag->ctx);
376257251Skib		if (!TAILQ_EMPTY(&map->map_entries)) {
377257251Skib			DMAR_CTX_UNLOCK(tag->ctx);
378257251Skib			return (EBUSY);
379257251Skib		}
380257251Skib		DMAR_CTX_UNLOCK(tag->ctx);
381257251Skib		free(map, M_DMAR_DMAMAP);
382257251Skib	}
383257251Skib	tag->map_count--;
384257251Skib	return (0);
385257251Skib}
386257251Skib
387257251Skib
388257251Skibstatic int
389257251Skibdmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
390257251Skib    bus_dmamap_t *mapp)
391257251Skib{
392257251Skib	struct bus_dma_tag_dmar *tag;
393257251Skib	struct bus_dmamap_dmar *map;
394257251Skib	int error, mflags;
395257251Skib	vm_memattr_t attr;
396257251Skib
397257251Skib	error = dmar_bus_dmamap_create(dmat, flags, mapp);
398257251Skib	if (error != 0)
399257251Skib		return (error);
400257251Skib
401257251Skib	mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
402257251Skib	mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
403257251Skib	attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
404257251Skib	    VM_MEMATTR_DEFAULT;
405263470Skib
406257251Skib	tag = (struct bus_dma_tag_dmar *)dmat;
407257251Skib	map = (struct bus_dmamap_dmar *)*mapp;
408257251Skib
409257251Skib	if (tag->common.maxsize < PAGE_SIZE &&
410257251Skib	    tag->common.alignment <= tag->common.maxsize &&
411257251Skib	    attr == VM_MEMATTR_DEFAULT) {
412257251Skib		*vaddr = malloc(tag->common.maxsize, M_DEVBUF, mflags);
413257251Skib		map->flags |= BUS_DMAMAP_DMAR_MALLOC;
414257251Skib	} else {
415257251Skib		*vaddr = (void *)kmem_alloc_attr(kernel_arena,
416257251Skib		    tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR,
417257251Skib		    attr);
418257251Skib		map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC;
419257251Skib	}
420257251Skib	if (*vaddr == NULL) {
421257251Skib		dmar_bus_dmamap_destroy(dmat, *mapp);
422257251Skib		*mapp = NULL;
423257251Skib		return (ENOMEM);
424257251Skib	}
425257251Skib	return (0);
426257251Skib}
427257251Skib
428257251Skibstatic void
429257251Skibdmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
430257251Skib{
431257251Skib	struct bus_dma_tag_dmar *tag;
432257251Skib	struct bus_dmamap_dmar *map;
433257251Skib
434257251Skib	tag = (struct bus_dma_tag_dmar *)dmat;
435257251Skib	map = (struct bus_dmamap_dmar *)map1;
436257251Skib
437257251Skib	if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) {
438257251Skib		free(vaddr, M_DEVBUF);
439257251Skib		map->flags &= ~BUS_DMAMAP_DMAR_MALLOC;
440257251Skib	} else {
441257251Skib		KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0,
442257251Skib		    ("dmar_bus_dmamem_free for non alloced map %p", map));
443257251Skib		kmem_free(kernel_arena, (vm_offset_t)vaddr, tag->common.maxsize);
444257251Skib		map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC;
445257251Skib	}
446257251Skib
447257251Skib	dmar_bus_dmamap_destroy(dmat, map1);
448257251Skib}
449257251Skib
450257251Skibstatic int
451257251Skibdmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag,
452257251Skib    struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
453257251Skib    int flags, bus_dma_segment_t *segs, int *segp,
454257251Skib    struct dmar_map_entries_tailq *unroll_list)
455257251Skib{
456257251Skib	struct dmar_ctx *ctx;
457257251Skib	struct dmar_map_entry *entry;
458257251Skib	dmar_gaddr_t size;
459257251Skib	bus_size_t buflen1;
460257251Skib	int error, idx, gas_flags, seg;
461257251Skib
462281545Skib	KASSERT(offset < DMAR_PAGE_SIZE, ("offset %d", offset));
463257251Skib	if (segs == NULL)
464257251Skib		segs = tag->segments;
465257251Skib	ctx = tag->ctx;
466257251Skib	seg = *segp;
467259512Skib	error = 0;
468257251Skib	idx = 0;
469257251Skib	while (buflen > 0) {
470257251Skib		seg++;
471257251Skib		if (seg >= tag->common.nsegments) {
472257251Skib			error = EFBIG;
473257251Skib			break;
474257251Skib		}
475257251Skib		buflen1 = buflen > tag->common.maxsegsz ?
476257251Skib		    tag->common.maxsegsz : buflen;
477257251Skib		size = round_page(offset + buflen1);
478257251Skib
479257251Skib		/*
480257251Skib		 * (Too) optimistically allow split if there are more
481257251Skib		 * then one segments left.
482257251Skib		 */
483257251Skib		gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0;
484257251Skib		if (seg + 1 < tag->common.nsegments)
485257251Skib			gas_flags |= DMAR_GM_CANSPLIT;
486257251Skib
487281545Skib		error = dmar_gas_map(ctx, &tag->common, size, offset,
488257251Skib		    DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
489257251Skib		    gas_flags, ma + idx, &entry);
490257251Skib		if (error != 0)
491257251Skib			break;
492257251Skib		if ((gas_flags & DMAR_GM_CANSPLIT) != 0) {
493257251Skib			KASSERT(size >= entry->end - entry->start,
494257251Skib			    ("split increased entry size %jx %jx %jx",
495257251Skib			    (uintmax_t)size, (uintmax_t)entry->start,
496257251Skib			    (uintmax_t)entry->end));
497257251Skib			size = entry->end - entry->start;
498257251Skib			if (buflen1 > size)
499257251Skib				buflen1 = size;
500257251Skib		} else {
501257251Skib			KASSERT(entry->end - entry->start == size,
502257251Skib			    ("no split allowed %jx %jx %jx",
503257251Skib			    (uintmax_t)size, (uintmax_t)entry->start,
504257251Skib			    (uintmax_t)entry->end));
505257251Skib		}
506281545Skib		if (offset + buflen1 > size)
507281545Skib			buflen1 = size - offset;
508281545Skib		if (buflen1 > tag->common.maxsegsz)
509281545Skib			buflen1 = tag->common.maxsegsz;
510257251Skib
511257251Skib		KASSERT(((entry->start + offset) & (tag->common.alignment - 1))
512257251Skib		    == 0,
513257251Skib		    ("alignment failed: ctx %p start 0x%jx offset %x "
514257251Skib		    "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
515257251Skib		    (uintmax_t)tag->common.alignment));
516257251Skib		KASSERT(entry->end <= tag->common.lowaddr ||
517257251Skib		    entry->start >= tag->common.highaddr,
518257251Skib		    ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
519257251Skib		    "lowaddr 0x%jx highaddr 0x%jx", ctx,
520257251Skib		    (uintmax_t)entry->start, (uintmax_t)entry->end,
521257251Skib		    (uintmax_t)tag->common.lowaddr,
522257251Skib		    (uintmax_t)tag->common.highaddr));
523281545Skib		KASSERT(dmar_test_boundary(entry->start + offset, buflen1,
524281545Skib		    tag->common.boundary),
525257251Skib		    ("boundary failed: ctx %p start 0x%jx end 0x%jx "
526257251Skib		    "boundary 0x%jx", ctx, (uintmax_t)entry->start,
527257251Skib		    (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
528257251Skib		KASSERT(buflen1 <= tag->common.maxsegsz,
529257251Skib		    ("segment too large: ctx %p start 0x%jx end 0x%jx "
530281545Skib		    "buflen1 0x%jx maxsegsz 0x%jx", ctx,
531281545Skib		    (uintmax_t)entry->start, (uintmax_t)entry->end,
532281545Skib		    (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
533257251Skib
534257251Skib		DMAR_CTX_LOCK(ctx);
535257251Skib		TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
536257251Skib		entry->flags |= DMAR_MAP_ENTRY_MAP;
537257251Skib		DMAR_CTX_UNLOCK(ctx);
538257251Skib		TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
539257251Skib
540257251Skib		segs[seg].ds_addr = entry->start + offset;
541257251Skib		segs[seg].ds_len = buflen1;
542257251Skib
543257251Skib		idx += OFF_TO_IDX(trunc_page(offset + buflen1));
544257251Skib		offset += buflen1;
545257251Skib		offset &= DMAR_PAGE_MASK;
546281545Skib		buflen -= buflen1;
547257251Skib	}
548257251Skib	if (error == 0)
549257251Skib		*segp = seg;
550257251Skib	return (error);
551257251Skib}
552257251Skib
553257251Skibstatic int
554257251Skibdmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag,
555257251Skib    struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
556257251Skib    int flags, bus_dma_segment_t *segs, int *segp)
557257251Skib{
558257251Skib	struct dmar_ctx *ctx;
559257251Skib	struct dmar_map_entry *entry, *entry1;
560257251Skib	struct dmar_map_entries_tailq unroll_list;
561257251Skib	int error;
562257251Skib
563257251Skib	ctx = tag->ctx;
564257251Skib	atomic_add_long(&ctx->loads, 1);
565257251Skib
566257251Skib	TAILQ_INIT(&unroll_list);
567257251Skib	error = dmar_bus_dmamap_load_something1(tag, map, ma, offset,
568257251Skib	    buflen, flags, segs, segp, &unroll_list);
569257251Skib	if (error != 0) {
570257251Skib		/*
571257251Skib		 * The busdma interface does not allow us to report
572257251Skib		 * partial buffer load, so unfortunately we have to
573257251Skib		 * revert all work done.
574257251Skib		 */
575257251Skib		DMAR_CTX_LOCK(ctx);
576257251Skib		TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link,
577257251Skib		    entry1) {
578257251Skib			/*
579257251Skib			 * No entries other than what we have created
580257251Skib			 * during the failed run might have been
581257251Skib			 * inserted there in between, since we own ctx
582257251Skib			 * pglock.
583257251Skib			 */
584257251Skib			TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
585257251Skib			TAILQ_REMOVE(&unroll_list, entry, unroll_link);
586257251Skib			TAILQ_INSERT_TAIL(&ctx->unload_entries, entry,
587257251Skib			    dmamap_link);
588257251Skib		}
589257251Skib		DMAR_CTX_UNLOCK(ctx);
590257251Skib		taskqueue_enqueue(ctx->dmar->delayed_taskqueue,
591257251Skib		    &ctx->unload_task);
592257251Skib	}
593257251Skib
594257251Skib	if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
595257251Skib	    !map->cansleep)
596257251Skib		error = EINPROGRESS;
597257251Skib	if (error == EINPROGRESS)
598257251Skib		dmar_bus_schedule_dmamap(ctx->dmar, map);
599257251Skib	return (error);
600257251Skib}
601257251Skib
602257251Skibstatic int
603257251Skibdmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
604257251Skib    struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
605257251Skib    bus_dma_segment_t *segs, int *segp)
606257251Skib{
607257251Skib	struct bus_dma_tag_dmar *tag;
608257251Skib	struct bus_dmamap_dmar *map;
609257251Skib
610257251Skib	tag = (struct bus_dma_tag_dmar *)dmat;
611257251Skib	map = (struct bus_dmamap_dmar *)map1;
612257251Skib	return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
613257251Skib	    flags, segs, segp));
614257251Skib}
615257251Skib
616257251Skibstatic int
617257251Skibdmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
618257251Skib    vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
619257251Skib    int *segp)
620257251Skib{
621257251Skib	struct bus_dma_tag_dmar *tag;
622257251Skib	struct bus_dmamap_dmar *map;
623257251Skib	vm_page_t *ma;
624257251Skib	vm_paddr_t pstart, pend;
625257251Skib	int error, i, ma_cnt, offset;
626257251Skib
627257251Skib	tag = (struct bus_dma_tag_dmar *)dmat;
628257251Skib	map = (struct bus_dmamap_dmar *)map1;
629257251Skib	pstart = trunc_page(buf);
630257251Skib	pend = round_page(buf + buflen);
631257251Skib	offset = buf & PAGE_MASK;
632257251Skib	ma_cnt = OFF_TO_IDX(pend - pstart);
633257251Skib	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
634257251Skib	    M_WAITOK : M_NOWAIT);
635257251Skib	if (ma == NULL)
636257251Skib		return (ENOMEM);
637257251Skib	for (i = 0; i < ma_cnt; i++)
638257251Skib		ma[i] = PHYS_TO_VM_PAGE(pstart + i * PAGE_SIZE);
639257251Skib	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
640257251Skib	    flags, segs, segp);
641257251Skib	free(ma, M_DEVBUF);
642257251Skib	return (error);
643257251Skib}
644257251Skib
645257251Skibstatic int
646257251Skibdmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
647257251Skib    bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
648257251Skib    int *segp)
649257251Skib{
650257251Skib	struct bus_dma_tag_dmar *tag;
651257251Skib	struct bus_dmamap_dmar *map;
652257251Skib	vm_page_t *ma, fma;
653257251Skib	vm_paddr_t pstart, pend, paddr;
654257251Skib	int error, i, ma_cnt, offset;
655257251Skib
656257251Skib	tag = (struct bus_dma_tag_dmar *)dmat;
657257251Skib	map = (struct bus_dmamap_dmar *)map1;
658257251Skib	pstart = trunc_page((vm_offset_t)buf);
659257251Skib	pend = round_page((vm_offset_t)buf + buflen);
660257251Skib	offset = (vm_offset_t)buf & PAGE_MASK;
661257251Skib	ma_cnt = OFF_TO_IDX(pend - pstart);
662257251Skib	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
663257251Skib	    M_WAITOK : M_NOWAIT);
664257251Skib	if (ma == NULL)
665257251Skib		return (ENOMEM);
666257251Skib	if (dumping) {
667257251Skib		/*
668257251Skib		 * If dumping, do not attempt to call
669257251Skib		 * PHYS_TO_VM_PAGE() at all.  It may return non-NULL
670257251Skib		 * but the vm_page returned might be not initialized,
671257251Skib		 * e.g. for the kernel itself.
672257251Skib		 */
673257251Skib		KASSERT(pmap == kernel_pmap, ("non-kernel address write"));
674257251Skib		fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF,
675257251Skib		    M_ZERO | (map->cansleep ? M_WAITOK : M_NOWAIT));
676257251Skib		if (fma == NULL) {
677257251Skib			free(ma, M_DEVBUF);
678257251Skib			return (ENOMEM);
679257251Skib		}
680257251Skib		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
681257251Skib			paddr = pmap_kextract(pstart);
682257251Skib			vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
683257251Skib			ma[i] = &fma[i];
684257251Skib		}
685257251Skib	} else {
686257251Skib		fma = NULL;
687257251Skib		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
688257251Skib			if (pmap == kernel_pmap)
689257251Skib				paddr = pmap_kextract(pstart);
690257251Skib			else
691257251Skib				paddr = pmap_extract(pmap, pstart);
692257251Skib			ma[i] = PHYS_TO_VM_PAGE(paddr);
693257251Skib			KASSERT(VM_PAGE_TO_PHYS(ma[i]) == paddr,
694257251Skib			    ("PHYS_TO_VM_PAGE failed %jx %jx m %p",
695257251Skib			    (uintmax_t)paddr, (uintmax_t)VM_PAGE_TO_PHYS(ma[i]),
696257251Skib			    ma[i]));
697257251Skib		}
698257251Skib	}
699257251Skib	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
700257251Skib	    flags, segs, segp);
701257251Skib	free(ma, M_DEVBUF);
702257251Skib	free(fma, M_DEVBUF);
703257251Skib	return (error);
704257251Skib}
705257251Skib
706257251Skibstatic void
707257251Skibdmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
708257251Skib    struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
709257251Skib{
710257251Skib	struct bus_dmamap_dmar *map;
711257251Skib
712257251Skib	if (map1 == NULL)
713257251Skib		return;
714257251Skib	map = (struct bus_dmamap_dmar *)map1;
715257251Skib	map->mem = *mem;
716257251Skib	map->tag = (struct bus_dma_tag_dmar *)dmat;
717257251Skib	map->callback = callback;
718257251Skib	map->callback_arg = callback_arg;
719257251Skib}
720257251Skib
721257251Skibstatic bus_dma_segment_t *
722257251Skibdmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
723257251Skib    bus_dma_segment_t *segs, int nsegs, int error)
724257251Skib{
725257251Skib	struct bus_dma_tag_dmar *tag;
726257251Skib	struct bus_dmamap_dmar *map;
727257251Skib
728257251Skib	tag = (struct bus_dma_tag_dmar *)dmat;
729257251Skib	map = (struct bus_dmamap_dmar *)map1;
730257251Skib
731257251Skib	if (!map->locked) {
732257251Skib		KASSERT(map->cansleep,
733257251Skib		    ("map not locked and not sleepable context %p", map));
734257251Skib
735257251Skib		/*
736257251Skib		 * We are called from the delayed context.  Relock the
737257251Skib		 * driver.
738257251Skib		 */
739257251Skib		(tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
740257251Skib		map->locked = true;
741257251Skib	}
742257251Skib
743257251Skib	if (segs == NULL)
744257251Skib		segs = tag->segments;
745257251Skib	return (segs);
746257251Skib}
747257251Skib
748257251Skib/*
749257251Skib * The limitations of busdma KPI forces the dmar to perform the actual
750257251Skib * unload, consisting of the unmapping of the map entries page tables,
751257251Skib * from the delayed context on i386, since page table page mapping
752257251Skib * might require a sleep to be successfull.  The unfortunate
753257251Skib * consequence is that the DMA requests can be served some time after
754257251Skib * the bus_dmamap_unload() call returned.
755257251Skib *
756257251Skib * On amd64, we assume that sf allocation cannot fail.
757257251Skib */
758257251Skibstatic void
759257251Skibdmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
760257251Skib{
761257251Skib	struct bus_dma_tag_dmar *tag;
762257251Skib	struct bus_dmamap_dmar *map;
763257251Skib	struct dmar_ctx *ctx;
764257251Skib#if defined(__amd64__)
765257251Skib	struct dmar_map_entries_tailq entries;
766257251Skib#endif
767257251Skib
768257251Skib	tag = (struct bus_dma_tag_dmar *)dmat;
769257251Skib	map = (struct bus_dmamap_dmar *)map1;
770257251Skib	ctx = tag->ctx;
771257251Skib	atomic_add_long(&ctx->unloads, 1);
772257251Skib
773257251Skib#if defined(__i386__)
774257251Skib	DMAR_CTX_LOCK(ctx);
775257251Skib	TAILQ_CONCAT(&ctx->unload_entries, &map->map_entries, dmamap_link);
776257251Skib	DMAR_CTX_UNLOCK(ctx);
777257251Skib	taskqueue_enqueue(ctx->dmar->delayed_taskqueue, &ctx->unload_task);
778257251Skib#else /* defined(__amd64__) */
779257251Skib	TAILQ_INIT(&entries);
780257251Skib	DMAR_CTX_LOCK(ctx);
781257251Skib	TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
782257251Skib	DMAR_CTX_UNLOCK(ctx);
783257251Skib	THREAD_NO_SLEEPING();
784257251Skib	dmar_ctx_unload(ctx, &entries, false);
785257251Skib	THREAD_SLEEPING_OK();
786257251Skib	KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx));
787257251Skib#endif
788257251Skib}
789257251Skib
790257251Skibstatic void
791257251Skibdmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
792257251Skib    bus_dmasync_op_t op)
793257251Skib{
794257251Skib}
795257251Skib
796257251Skibstruct bus_dma_impl bus_dma_dmar_impl = {
797257251Skib	.tag_create = dmar_bus_dma_tag_create,
798257251Skib	.tag_destroy = dmar_bus_dma_tag_destroy,
799257251Skib	.map_create = dmar_bus_dmamap_create,
800257251Skib	.map_destroy = dmar_bus_dmamap_destroy,
801257251Skib	.mem_alloc = dmar_bus_dmamem_alloc,
802257251Skib	.mem_free = dmar_bus_dmamem_free,
803257251Skib	.load_phys = dmar_bus_dmamap_load_phys,
804257251Skib	.load_buffer = dmar_bus_dmamap_load_buffer,
805257251Skib	.load_ma = dmar_bus_dmamap_load_ma,
806257251Skib	.map_waitok = dmar_bus_dmamap_waitok,
807257251Skib	.map_complete = dmar_bus_dmamap_complete,
808257251Skib	.map_unload = dmar_bus_dmamap_unload,
809257251Skib	.map_sync = dmar_bus_dmamap_sync
810257251Skib};
811257251Skib
812257251Skibstatic void
813257251Skibdmar_bus_task_dmamap(void *arg, int pending)
814257251Skib{
815257251Skib	struct bus_dma_tag_dmar *tag;
816257251Skib	struct bus_dmamap_dmar *map;
817257251Skib	struct dmar_unit *unit;
818257251Skib
819257251Skib	unit = arg;
820257251Skib	DMAR_LOCK(unit);
821257251Skib	while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
822257251Skib		TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
823257251Skib		DMAR_UNLOCK(unit);
824257251Skib		tag = map->tag;
825257251Skib		map->cansleep = true;
826257251Skib		map->locked = false;
827257251Skib		bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
828257251Skib		    &map->mem, map->callback, map->callback_arg,
829257251Skib		    BUS_DMA_WAITOK);
830257251Skib		map->cansleep = false;
831257251Skib		if (map->locked) {
832257251Skib			(tag->common.lockfunc)(tag->common.lockfuncarg,
833257251Skib			    BUS_DMA_UNLOCK);
834257251Skib		} else
835257251Skib			map->locked = true;
836257251Skib		map->cansleep = false;
837257251Skib		DMAR_LOCK(unit);
838257251Skib	}
839257251Skib	DMAR_UNLOCK(unit);
840257251Skib}
841257251Skib
842257251Skibstatic void
843257251Skibdmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map)
844257251Skib{
845257251Skib
846257251Skib	map->locked = false;
847257251Skib	DMAR_LOCK(unit);
848257251Skib	TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
849257251Skib	DMAR_UNLOCK(unit);
850257251Skib	taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
851257251Skib}
852257251Skib
853257251Skibint
854257251Skibdmar_init_busdma(struct dmar_unit *unit)
855257251Skib{
856257251Skib
857257251Skib	TAILQ_INIT(&unit->delayed_maps);
858257251Skib	TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit);
859257251Skib	unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK,
860257251Skib	    taskqueue_thread_enqueue, &unit->delayed_taskqueue);
861257251Skib	taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
862257251Skib	    "dmar%d busdma taskq", unit->unit);
863257251Skib	return (0);
864257251Skib}
865257251Skib
866257251Skibvoid
867257251Skibdmar_fini_busdma(struct dmar_unit *unit)
868257251Skib{
869257251Skib
870257251Skib	if (unit->delayed_taskqueue == NULL)
871257251Skib		return;
872257251Skib
873257251Skib	taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
874257251Skib	taskqueue_free(unit->delayed_taskqueue);
875257251Skib	unit->delayed_taskqueue = NULL;
876257251Skib}
877