1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2013 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
8 * under sponsorship from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD$");
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/domainset.h>
38#include <sys/malloc.h>
39#include <sys/bus.h>
40#include <sys/conf.h>
41#include <sys/interrupt.h>
42#include <sys/kernel.h>
43#include <sys/ktr.h>
44#include <sys/lock.h>
45#include <sys/proc.h>
46#include <sys/memdesc.h>
47#include <sys/mutex.h>
48#include <sys/sysctl.h>
49#include <sys/rman.h>
50#include <sys/taskqueue.h>
51#include <sys/tree.h>
52#include <sys/uio.h>
53#include <sys/vmem.h>
54#include <dev/pci/pcireg.h>
55#include <dev/pci/pcivar.h>
56#include <vm/vm.h>
57#include <vm/vm_extern.h>
58#include <vm/vm_kern.h>
59#include <vm/vm_object.h>
60#include <vm/vm_page.h>
61#include <vm/vm_map.h>
62#include <machine/atomic.h>
63#include <machine/bus.h>
64#include <machine/md_var.h>
65#include <machine/specialreg.h>
66#include <x86/include/busdma_impl.h>
67#include <x86/iommu/intel_reg.h>
68#include <x86/iommu/busdma_dmar.h>
69#include <x86/iommu/intel_dmar.h>
70
71/*
72 * busdma_dmar.c, the implementation of the busdma(9) interface using
73 * DMAR units from Intel VT-d.
74 */
75
76static bool
77dmar_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
78{
79	char str[128], *env;
80	int default_bounce;
81	bool ret;
82	static const char bounce_str[] = "bounce";
83	static const char dmar_str[] = "dmar";
84
85	default_bounce = 0;
86	env = kern_getenv("hw.busdma.default");
87	if (env != NULL) {
88		if (strcmp(env, bounce_str) == 0)
89			default_bounce = 1;
90		else if (strcmp(env, dmar_str) == 0)
91			default_bounce = 0;
92		freeenv(env);
93	}
94
95	snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d",
96	    domain, bus, slot, func);
97	env = kern_getenv(str);
98	if (env == NULL)
99		return (default_bounce != 0);
100	if (strcmp(env, bounce_str) == 0)
101		ret = true;
102	else if (strcmp(env, dmar_str) == 0)
103		ret = false;
104	else
105		ret = default_bounce != 0;
106	freeenv(env);
107	return (ret);
108}
109
110/*
111 * Given original device, find the requester ID that will be seen by
112 * the DMAR unit and used for page table lookup.  PCI bridges may take
113 * ownership of transactions from downstream devices, so it may not be
114 * the same as the BSF of the target device.  In those cases, all
115 * devices downstream of the bridge must share a single mapping
116 * domain, and must collectively be assigned to use either DMAR or
117 * bounce mapping.
118 */
119device_t
120dmar_get_requester(device_t dev, uint16_t *rid)
121{
122	devclass_t pci_class;
123	device_t l, pci, pcib, pcip, pcibp, requester;
124	int cap_offset;
125	uint16_t pcie_flags;
126	bool bridge_is_pcie;
127
128	pci_class = devclass_find("pci");
129	l = requester = dev;
130
131	*rid = pci_get_rid(dev);
132
133	/*
134	 * Walk the bridge hierarchy from the target device to the
135	 * host port to find the translating bridge nearest the DMAR
136	 * unit.
137	 */
138	for (;;) {
139		pci = device_get_parent(l);
140		KASSERT(pci != NULL, ("dmar_get_requester(%s): NULL parent "
141		    "for %s", device_get_name(dev), device_get_name(l)));
142		KASSERT(device_get_devclass(pci) == pci_class,
143		    ("dmar_get_requester(%s): non-pci parent %s for %s",
144		    device_get_name(dev), device_get_name(pci),
145		    device_get_name(l)));
146
147		pcib = device_get_parent(pci);
148		KASSERT(pcib != NULL, ("dmar_get_requester(%s): NULL bridge "
149		    "for %s", device_get_name(dev), device_get_name(pci)));
150
151		/*
152		 * The parent of our "bridge" isn't another PCI bus,
153		 * so pcib isn't a PCI->PCI bridge but rather a host
154		 * port, and the requester ID won't be translated
155		 * further.
156		 */
157		pcip = device_get_parent(pcib);
158		if (device_get_devclass(pcip) != pci_class)
159			break;
160		pcibp = device_get_parent(pcip);
161
162		if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
163			/*
164			 * Do not stop the loop even if the target
165			 * device is PCIe, because it is possible (but
166			 * unlikely) to have a PCI->PCIe bridge
167			 * somewhere in the hierarchy.
168			 */
169			l = pcib;
170		} else {
171			/*
172			 * Device is not PCIe, it cannot be seen as a
173			 * requester by DMAR unit.  Check whether the
174			 * bridge is PCIe.
175			 */
176			bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
177			    &cap_offset) == 0;
178			requester = pcib;
179
180			/*
181			 * Check for a buggy PCIe/PCI bridge that
182			 * doesn't report the express capability.  If
183			 * the bridge above it is express but isn't a
184			 * PCI bridge, then we know pcib is actually a
185			 * PCIe/PCI bridge.
186			 */
187			if (!bridge_is_pcie && pci_find_cap(pcibp,
188			    PCIY_EXPRESS, &cap_offset) == 0) {
189				pcie_flags = pci_read_config(pcibp,
190				    cap_offset + PCIER_FLAGS, 2);
191				if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
192				    PCIEM_TYPE_PCI_BRIDGE)
193					bridge_is_pcie = true;
194			}
195
196			if (bridge_is_pcie) {
197				/*
198				 * The current device is not PCIe, but
199				 * the bridge above it is.  This is a
200				 * PCIe->PCI bridge.  Assume that the
201				 * requester ID will be the secondary
202				 * bus number with slot and function
203				 * set to zero.
204				 *
205				 * XXX: Doesn't handle the case where
206				 * the bridge is PCIe->PCI-X, and the
207				 * bridge will only take ownership of
208				 * requests in some cases.  We should
209				 * provide context entries with the
210				 * same page tables for taken and
211				 * non-taken transactions.
212				 */
213				*rid = PCI_RID(pci_get_bus(l), 0, 0);
214				l = pcibp;
215			} else {
216				/*
217				 * Neither the device nor the bridge
218				 * above it are PCIe.  This is a
219				 * conventional PCI->PCI bridge, which
220				 * will use the bridge's BSF as the
221				 * requester ID.
222				 */
223				*rid = pci_get_rid(pcib);
224				l = pcib;
225			}
226		}
227	}
228	return (requester);
229}
230
231struct dmar_ctx *
232dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr)
233{
234	device_t requester;
235	struct dmar_ctx *ctx;
236	bool disabled;
237	uint16_t rid;
238
239	requester = dmar_get_requester(dev, &rid);
240
241	/*
242	 * If the user requested the IOMMU disabled for the device, we
243	 * cannot disable the DMAR, due to possibility of other
244	 * devices on the same DMAR still requiring translation.
245	 * Instead provide the identity mapping for the device
246	 * context.
247	 */
248	disabled = dmar_bus_dma_is_dev_disabled(pci_get_domain(requester),
249	    pci_get_bus(requester), pci_get_slot(requester),
250	    pci_get_function(requester));
251	ctx = dmar_get_ctx_for_dev(dmar, requester, rid, disabled, rmrr);
252	if (ctx == NULL)
253		return (NULL);
254	if (disabled) {
255		/*
256		 * Keep the first reference on context, release the
257		 * later refs.
258		 */
259		DMAR_LOCK(dmar);
260		if ((ctx->flags & DMAR_CTX_DISABLED) == 0) {
261			ctx->flags |= DMAR_CTX_DISABLED;
262			DMAR_UNLOCK(dmar);
263		} else {
264			dmar_free_ctx_locked(dmar, ctx);
265		}
266		ctx = NULL;
267	}
268	return (ctx);
269}
270
271bus_dma_tag_t
272dmar_get_dma_tag(device_t dev, device_t child)
273{
274	struct dmar_unit *dmar;
275	struct dmar_ctx *ctx;
276	bus_dma_tag_t res;
277
278	dmar = dmar_find(child, bootverbose);
279	/* Not in scope of any DMAR ? */
280	if (dmar == NULL)
281		return (NULL);
282	if (!dmar->dma_enabled)
283		return (NULL);
284	dmar_quirks_pre_use(dmar);
285	dmar_instantiate_rmrr_ctxs(dmar);
286
287	ctx = dmar_instantiate_ctx(dmar, child, false);
288	res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag;
289	return (res);
290}
291
292bool
293bus_dma_dmar_set_buswide(device_t dev)
294{
295	struct dmar_unit *dmar;
296	device_t parent;
297	u_int busno, slot, func;
298
299	parent = device_get_parent(dev);
300	if (device_get_devclass(parent) != devclass_find("pci"))
301		return (false);
302	dmar = dmar_find(dev, bootverbose);
303	if (dmar == NULL)
304		return (false);
305	busno = pci_get_bus(dev);
306	slot = pci_get_slot(dev);
307	func = pci_get_function(dev);
308	if (slot != 0 || func != 0) {
309		if (bootverbose) {
310			device_printf(dev,
311			    "dmar%d pci%d:%d:%d requested buswide busdma\n",
312			    dmar->unit, busno, slot, func);
313		}
314		return (false);
315	}
316	dmar_set_buswide_ctx(dmar, busno);
317	return (true);
318}
319
320static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map");
321
322static void dmar_bus_schedule_dmamap(struct dmar_unit *unit,
323    struct bus_dmamap_dmar *map);
324
325static int
326dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
327    bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
328    bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
329    int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
330    void *lockfuncarg, bus_dma_tag_t *dmat)
331{
332	struct bus_dma_tag_dmar *newtag, *oldtag;
333	int error;
334
335	*dmat = NULL;
336	error = common_bus_dma_tag_create(parent != NULL ?
337	    &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment,
338	    boundary, lowaddr, highaddr, filter, filterarg, maxsize,
339	    nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
340	    sizeof(struct bus_dma_tag_dmar), (void **)&newtag);
341	if (error != 0)
342		goto out;
343
344	oldtag = (struct bus_dma_tag_dmar *)parent;
345	newtag->common.impl = &bus_dma_dmar_impl;
346	newtag->ctx = oldtag->ctx;
347	newtag->owner = oldtag->owner;
348
349	*dmat = (bus_dma_tag_t)newtag;
350out:
351	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
352	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
353	    error);
354	return (error);
355}
356
357static int
358dmar_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
359{
360
361	return (0);
362}
363
364static int
365dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
366{
367	struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent;
368	int error;
369
370	error = 0;
371	dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1;
372
373	if (dmat != NULL) {
374		if (dmat->map_count != 0) {
375			error = EBUSY;
376			goto out;
377		}
378		while (dmat != NULL) {
379			parent = (struct bus_dma_tag_dmar *)dmat->common.parent;
380			if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
381			    1) {
382				if (dmat == &dmat->ctx->ctx_tag)
383					dmar_free_ctx(dmat->ctx);
384				free_domain(dmat->segments, M_DMAR_DMAMAP);
385				free(dmat, M_DEVBUF);
386				dmat = parent;
387			} else
388				dmat = NULL;
389		}
390	}
391out:
392	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
393	return (error);
394}
395
396static bool
397dmar_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
398{
399
400	return (false);
401}
402
403static int
404dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
405{
406	struct bus_dma_tag_dmar *tag;
407	struct bus_dmamap_dmar *map;
408
409	tag = (struct bus_dma_tag_dmar *)dmat;
410	map = malloc_domainset(sizeof(*map), M_DMAR_DMAMAP,
411	    DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO);
412	if (map == NULL) {
413		*mapp = NULL;
414		return (ENOMEM);
415	}
416	if (tag->segments == NULL) {
417		tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) *
418		    tag->common.nsegments, M_DMAR_DMAMAP,
419		    DOMAINSET_PREF(tag->common.domain), M_NOWAIT);
420		if (tag->segments == NULL) {
421			free_domain(map, M_DMAR_DMAMAP);
422			*mapp = NULL;
423			return (ENOMEM);
424		}
425	}
426	TAILQ_INIT(&map->map_entries);
427	map->tag = tag;
428	map->locked = true;
429	map->cansleep = false;
430	tag->map_count++;
431	*mapp = (bus_dmamap_t)map;
432
433	return (0);
434}
435
436static int
437dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
438{
439	struct bus_dma_tag_dmar *tag;
440	struct bus_dmamap_dmar *map;
441	struct dmar_domain *domain;
442
443	tag = (struct bus_dma_tag_dmar *)dmat;
444	map = (struct bus_dmamap_dmar *)map1;
445	if (map != NULL) {
446		domain = tag->ctx->domain;
447		DMAR_DOMAIN_LOCK(domain);
448		if (!TAILQ_EMPTY(&map->map_entries)) {
449			DMAR_DOMAIN_UNLOCK(domain);
450			return (EBUSY);
451		}
452		DMAR_DOMAIN_UNLOCK(domain);
453		free_domain(map, M_DMAR_DMAMAP);
454	}
455	tag->map_count--;
456	return (0);
457}
458
459
460static int
461dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
462    bus_dmamap_t *mapp)
463{
464	struct bus_dma_tag_dmar *tag;
465	struct bus_dmamap_dmar *map;
466	int error, mflags;
467	vm_memattr_t attr;
468
469	error = dmar_bus_dmamap_create(dmat, flags, mapp);
470	if (error != 0)
471		return (error);
472
473	mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
474	mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
475	attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
476	    VM_MEMATTR_DEFAULT;
477
478	tag = (struct bus_dma_tag_dmar *)dmat;
479	map = (struct bus_dmamap_dmar *)*mapp;
480
481	if (tag->common.maxsize < PAGE_SIZE &&
482	    tag->common.alignment <= tag->common.maxsize &&
483	    attr == VM_MEMATTR_DEFAULT) {
484		*vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF,
485		    DOMAINSET_PREF(tag->common.domain), mflags);
486		map->flags |= BUS_DMAMAP_DMAR_MALLOC;
487	} else {
488		*vaddr = (void *)kmem_alloc_attr_domainset(
489		    DOMAINSET_PREF(tag->common.domain), tag->common.maxsize,
490		    mflags, 0ul, BUS_SPACE_MAXADDR, attr);
491		map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC;
492	}
493	if (*vaddr == NULL) {
494		dmar_bus_dmamap_destroy(dmat, *mapp);
495		*mapp = NULL;
496		return (ENOMEM);
497	}
498	return (0);
499}
500
501static void
502dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
503{
504	struct bus_dma_tag_dmar *tag;
505	struct bus_dmamap_dmar *map;
506
507	tag = (struct bus_dma_tag_dmar *)dmat;
508	map = (struct bus_dmamap_dmar *)map1;
509
510	if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) {
511		free_domain(vaddr, M_DEVBUF);
512		map->flags &= ~BUS_DMAMAP_DMAR_MALLOC;
513	} else {
514		KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0,
515		    ("dmar_bus_dmamem_free for non alloced map %p", map));
516		kmem_free((vm_offset_t)vaddr, tag->common.maxsize);
517		map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC;
518	}
519
520	dmar_bus_dmamap_destroy(dmat, map1);
521}
522
523static int
524dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag,
525    struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
526    int flags, bus_dma_segment_t *segs, int *segp,
527    struct dmar_map_entries_tailq *unroll_list)
528{
529	struct dmar_ctx *ctx;
530	struct dmar_domain *domain;
531	struct dmar_map_entry *entry;
532	dmar_gaddr_t size;
533	bus_size_t buflen1;
534	int error, idx, gas_flags, seg;
535
536	KASSERT(offset < DMAR_PAGE_SIZE, ("offset %d", offset));
537	if (segs == NULL)
538		segs = tag->segments;
539	ctx = tag->ctx;
540	domain = ctx->domain;
541	seg = *segp;
542	error = 0;
543	idx = 0;
544	while (buflen > 0) {
545		seg++;
546		if (seg >= tag->common.nsegments) {
547			error = EFBIG;
548			break;
549		}
550		buflen1 = buflen > tag->common.maxsegsz ?
551		    tag->common.maxsegsz : buflen;
552		size = round_page(offset + buflen1);
553
554		/*
555		 * (Too) optimistically allow split if there are more
556		 * then one segments left.
557		 */
558		gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0;
559		if (seg + 1 < tag->common.nsegments)
560			gas_flags |= DMAR_GM_CANSPLIT;
561
562		error = dmar_gas_map(domain, &tag->common, size, offset,
563		    DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
564		    gas_flags, ma + idx, &entry);
565		if (error != 0)
566			break;
567		if ((gas_flags & DMAR_GM_CANSPLIT) != 0) {
568			KASSERT(size >= entry->end - entry->start,
569			    ("split increased entry size %jx %jx %jx",
570			    (uintmax_t)size, (uintmax_t)entry->start,
571			    (uintmax_t)entry->end));
572			size = entry->end - entry->start;
573			if (buflen1 > size)
574				buflen1 = size;
575		} else {
576			KASSERT(entry->end - entry->start == size,
577			    ("no split allowed %jx %jx %jx",
578			    (uintmax_t)size, (uintmax_t)entry->start,
579			    (uintmax_t)entry->end));
580		}
581		if (offset + buflen1 > size)
582			buflen1 = size - offset;
583		if (buflen1 > tag->common.maxsegsz)
584			buflen1 = tag->common.maxsegsz;
585
586		KASSERT(((entry->start + offset) & (tag->common.alignment - 1))
587		    == 0,
588		    ("alignment failed: ctx %p start 0x%jx offset %x "
589		    "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
590		    (uintmax_t)tag->common.alignment));
591		KASSERT(entry->end <= tag->common.lowaddr ||
592		    entry->start >= tag->common.highaddr,
593		    ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
594		    "lowaddr 0x%jx highaddr 0x%jx", ctx,
595		    (uintmax_t)entry->start, (uintmax_t)entry->end,
596		    (uintmax_t)tag->common.lowaddr,
597		    (uintmax_t)tag->common.highaddr));
598		KASSERT(dmar_test_boundary(entry->start + offset, buflen1,
599		    tag->common.boundary),
600		    ("boundary failed: ctx %p start 0x%jx end 0x%jx "
601		    "boundary 0x%jx", ctx, (uintmax_t)entry->start,
602		    (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
603		KASSERT(buflen1 <= tag->common.maxsegsz,
604		    ("segment too large: ctx %p start 0x%jx end 0x%jx "
605		    "buflen1 0x%jx maxsegsz 0x%jx", ctx,
606		    (uintmax_t)entry->start, (uintmax_t)entry->end,
607		    (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
608
609		DMAR_DOMAIN_LOCK(domain);
610		TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
611		entry->flags |= DMAR_MAP_ENTRY_MAP;
612		DMAR_DOMAIN_UNLOCK(domain);
613		TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
614
615		segs[seg].ds_addr = entry->start + offset;
616		segs[seg].ds_len = buflen1;
617
618		idx += OFF_TO_IDX(trunc_page(offset + buflen1));
619		offset += buflen1;
620		offset &= DMAR_PAGE_MASK;
621		buflen -= buflen1;
622	}
623	if (error == 0)
624		*segp = seg;
625	return (error);
626}
627
628static int
629dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag,
630    struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
631    int flags, bus_dma_segment_t *segs, int *segp)
632{
633	struct dmar_ctx *ctx;
634	struct dmar_domain *domain;
635	struct dmar_map_entry *entry, *entry1;
636	struct dmar_map_entries_tailq unroll_list;
637	int error;
638
639	ctx = tag->ctx;
640	domain = ctx->domain;
641	atomic_add_long(&ctx->loads, 1);
642
643	TAILQ_INIT(&unroll_list);
644	error = dmar_bus_dmamap_load_something1(tag, map, ma, offset,
645	    buflen, flags, segs, segp, &unroll_list);
646	if (error != 0) {
647		/*
648		 * The busdma interface does not allow us to report
649		 * partial buffer load, so unfortunately we have to
650		 * revert all work done.
651		 */
652		DMAR_DOMAIN_LOCK(domain);
653		TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link,
654		    entry1) {
655			/*
656			 * No entries other than what we have created
657			 * during the failed run might have been
658			 * inserted there in between, since we own ctx
659			 * pglock.
660			 */
661			TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
662			TAILQ_REMOVE(&unroll_list, entry, unroll_link);
663			TAILQ_INSERT_TAIL(&domain->unload_entries, entry,
664			    dmamap_link);
665		}
666		DMAR_DOMAIN_UNLOCK(domain);
667		taskqueue_enqueue(domain->dmar->delayed_taskqueue,
668		    &domain->unload_task);
669	}
670
671	if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
672	    !map->cansleep)
673		error = EINPROGRESS;
674	if (error == EINPROGRESS)
675		dmar_bus_schedule_dmamap(domain->dmar, map);
676	return (error);
677}
678
679static int
680dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
681    struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
682    bus_dma_segment_t *segs, int *segp)
683{
684	struct bus_dma_tag_dmar *tag;
685	struct bus_dmamap_dmar *map;
686
687	tag = (struct bus_dma_tag_dmar *)dmat;
688	map = (struct bus_dmamap_dmar *)map1;
689	return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
690	    flags, segs, segp));
691}
692
693static int
694dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
695    vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
696    int *segp)
697{
698	struct bus_dma_tag_dmar *tag;
699	struct bus_dmamap_dmar *map;
700	vm_page_t *ma;
701	vm_paddr_t pstart, pend;
702	int error, i, ma_cnt, offset;
703
704	tag = (struct bus_dma_tag_dmar *)dmat;
705	map = (struct bus_dmamap_dmar *)map1;
706	pstart = trunc_page(buf);
707	pend = round_page(buf + buflen);
708	offset = buf & PAGE_MASK;
709	ma_cnt = OFF_TO_IDX(pend - pstart);
710	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
711	    M_WAITOK : M_NOWAIT);
712	if (ma == NULL)
713		return (ENOMEM);
714	for (i = 0; i < ma_cnt; i++)
715		ma[i] = PHYS_TO_VM_PAGE(pstart + i * PAGE_SIZE);
716	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
717	    flags, segs, segp);
718	free(ma, M_DEVBUF);
719	return (error);
720}
721
722static int
723dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
724    bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
725    int *segp)
726{
727	struct bus_dma_tag_dmar *tag;
728	struct bus_dmamap_dmar *map;
729	vm_page_t *ma, fma;
730	vm_paddr_t pstart, pend, paddr;
731	int error, i, ma_cnt, offset;
732
733	tag = (struct bus_dma_tag_dmar *)dmat;
734	map = (struct bus_dmamap_dmar *)map1;
735	pstart = trunc_page((vm_offset_t)buf);
736	pend = round_page((vm_offset_t)buf + buflen);
737	offset = (vm_offset_t)buf & PAGE_MASK;
738	ma_cnt = OFF_TO_IDX(pend - pstart);
739	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
740	    M_WAITOK : M_NOWAIT);
741	if (ma == NULL)
742		return (ENOMEM);
743	if (dumping) {
744		/*
745		 * If dumping, do not attempt to call
746		 * PHYS_TO_VM_PAGE() at all.  It may return non-NULL
747		 * but the vm_page returned might be not initialized,
748		 * e.g. for the kernel itself.
749		 */
750		KASSERT(pmap == kernel_pmap, ("non-kernel address write"));
751		fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF,
752		    M_ZERO | (map->cansleep ? M_WAITOK : M_NOWAIT));
753		if (fma == NULL) {
754			free(ma, M_DEVBUF);
755			return (ENOMEM);
756		}
757		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
758			paddr = pmap_kextract(pstart);
759			vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
760			ma[i] = &fma[i];
761		}
762	} else {
763		fma = NULL;
764		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
765			if (pmap == kernel_pmap)
766				paddr = pmap_kextract(pstart);
767			else
768				paddr = pmap_extract(pmap, pstart);
769			ma[i] = PHYS_TO_VM_PAGE(paddr);
770			KASSERT(VM_PAGE_TO_PHYS(ma[i]) == paddr,
771			    ("PHYS_TO_VM_PAGE failed %jx %jx m %p",
772			    (uintmax_t)paddr, (uintmax_t)VM_PAGE_TO_PHYS(ma[i]),
773			    ma[i]));
774		}
775	}
776	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
777	    flags, segs, segp);
778	free(ma, M_DEVBUF);
779	free(fma, M_DEVBUF);
780	return (error);
781}
782
783static void
784dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
785    struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
786{
787	struct bus_dmamap_dmar *map;
788
789	if (map1 == NULL)
790		return;
791	map = (struct bus_dmamap_dmar *)map1;
792	map->mem = *mem;
793	map->tag = (struct bus_dma_tag_dmar *)dmat;
794	map->callback = callback;
795	map->callback_arg = callback_arg;
796}
797
798static bus_dma_segment_t *
799dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
800    bus_dma_segment_t *segs, int nsegs, int error)
801{
802	struct bus_dma_tag_dmar *tag;
803	struct bus_dmamap_dmar *map;
804
805	tag = (struct bus_dma_tag_dmar *)dmat;
806	map = (struct bus_dmamap_dmar *)map1;
807
808	if (!map->locked) {
809		KASSERT(map->cansleep,
810		    ("map not locked and not sleepable context %p", map));
811
812		/*
813		 * We are called from the delayed context.  Relock the
814		 * driver.
815		 */
816		(tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
817		map->locked = true;
818	}
819
820	if (segs == NULL)
821		segs = tag->segments;
822	return (segs);
823}
824
825/*
826 * The limitations of busdma KPI forces the dmar to perform the actual
827 * unload, consisting of the unmapping of the map entries page tables,
828 * from the delayed context on i386, since page table page mapping
829 * might require a sleep to be successfull.  The unfortunate
830 * consequence is that the DMA requests can be served some time after
831 * the bus_dmamap_unload() call returned.
832 *
833 * On amd64, we assume that sf allocation cannot fail.
834 */
835static void
836dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
837{
838	struct bus_dma_tag_dmar *tag;
839	struct bus_dmamap_dmar *map;
840	struct dmar_ctx *ctx;
841	struct dmar_domain *domain;
842#if defined(__amd64__)
843	struct dmar_map_entries_tailq entries;
844#endif
845
846	tag = (struct bus_dma_tag_dmar *)dmat;
847	map = (struct bus_dmamap_dmar *)map1;
848	ctx = tag->ctx;
849	domain = ctx->domain;
850	atomic_add_long(&ctx->unloads, 1);
851
852#if defined(__i386__)
853	DMAR_DOMAIN_LOCK(domain);
854	TAILQ_CONCAT(&domain->unload_entries, &map->map_entries, dmamap_link);
855	DMAR_DOMAIN_UNLOCK(domain);
856	taskqueue_enqueue(domain->dmar->delayed_taskqueue,
857	    &domain->unload_task);
858#else /* defined(__amd64__) */
859	TAILQ_INIT(&entries);
860	DMAR_DOMAIN_LOCK(domain);
861	TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
862	DMAR_DOMAIN_UNLOCK(domain);
863	THREAD_NO_SLEEPING();
864	dmar_domain_unload(domain, &entries, false);
865	THREAD_SLEEPING_OK();
866	KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx));
867#endif
868}
869
870static void
871dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
872    bus_dmasync_op_t op)
873{
874}
875
876struct bus_dma_impl bus_dma_dmar_impl = {
877	.tag_create = dmar_bus_dma_tag_create,
878	.tag_destroy = dmar_bus_dma_tag_destroy,
879	.tag_set_domain = dmar_bus_dma_tag_set_domain,
880	.id_mapped = dmar_bus_dma_id_mapped,
881	.map_create = dmar_bus_dmamap_create,
882	.map_destroy = dmar_bus_dmamap_destroy,
883	.mem_alloc = dmar_bus_dmamem_alloc,
884	.mem_free = dmar_bus_dmamem_free,
885	.load_phys = dmar_bus_dmamap_load_phys,
886	.load_buffer = dmar_bus_dmamap_load_buffer,
887	.load_ma = dmar_bus_dmamap_load_ma,
888	.map_waitok = dmar_bus_dmamap_waitok,
889	.map_complete = dmar_bus_dmamap_complete,
890	.map_unload = dmar_bus_dmamap_unload,
891	.map_sync = dmar_bus_dmamap_sync,
892};
893
894static void
895dmar_bus_task_dmamap(void *arg, int pending)
896{
897	struct bus_dma_tag_dmar *tag;
898	struct bus_dmamap_dmar *map;
899	struct dmar_unit *unit;
900
901	unit = arg;
902	DMAR_LOCK(unit);
903	while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
904		TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
905		DMAR_UNLOCK(unit);
906		tag = map->tag;
907		map->cansleep = true;
908		map->locked = false;
909		bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
910		    &map->mem, map->callback, map->callback_arg,
911		    BUS_DMA_WAITOK);
912		map->cansleep = false;
913		if (map->locked) {
914			(tag->common.lockfunc)(tag->common.lockfuncarg,
915			    BUS_DMA_UNLOCK);
916		} else
917			map->locked = true;
918		map->cansleep = false;
919		DMAR_LOCK(unit);
920	}
921	DMAR_UNLOCK(unit);
922}
923
924static void
925dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map)
926{
927
928	map->locked = false;
929	DMAR_LOCK(unit);
930	TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
931	DMAR_UNLOCK(unit);
932	taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
933}
934
935int
936dmar_init_busdma(struct dmar_unit *unit)
937{
938
939	unit->dma_enabled = 1;
940	TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled);
941	TAILQ_INIT(&unit->delayed_maps);
942	TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit);
943	unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK,
944	    taskqueue_thread_enqueue, &unit->delayed_taskqueue);
945	taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
946	    "dmar%d busdma taskq", unit->unit);
947	return (0);
948}
949
950void
951dmar_fini_busdma(struct dmar_unit *unit)
952{
953
954	if (unit->delayed_taskqueue == NULL)
955		return;
956
957	taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
958	taskqueue_free(unit->delayed_taskqueue);
959	unit->delayed_taskqueue = NULL;
960}
961
962int
963bus_dma_dmar_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1,
964    vm_paddr_t start, vm_size_t length, int flags)
965{
966	struct bus_dma_tag_common *tc;
967	struct bus_dma_tag_dmar *tag;
968	struct bus_dmamap_dmar *map;
969	struct dmar_ctx *ctx;
970	struct dmar_domain *domain;
971	struct dmar_map_entry *entry;
972	vm_page_t *ma;
973	vm_size_t i;
974	int error;
975	bool waitok;
976
977	MPASS((start & PAGE_MASK) == 0);
978	MPASS((length & PAGE_MASK) == 0);
979	MPASS(length > 0);
980	MPASS(start + length >= start);
981	MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0);
982
983	tc = (struct bus_dma_tag_common *)dmat;
984	if (tc->impl != &bus_dma_dmar_impl)
985		return (0);
986
987	tag = (struct bus_dma_tag_dmar *)dmat;
988	ctx = tag->ctx;
989	domain = ctx->domain;
990	map = (struct bus_dmamap_dmar *)map1;
991	waitok = (flags & BUS_DMA_NOWAIT) != 0;
992
993	entry = dmar_gas_alloc_entry(domain, waitok ? 0 : DMAR_PGF_WAITOK);
994	if (entry == NULL)
995		return (ENOMEM);
996	entry->start = start;
997	entry->end = start + length;
998	ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ?
999	    M_WAITOK : M_NOWAIT);
1000	if (ma == NULL) {
1001		dmar_gas_free_entry(domain, entry);
1002		return (ENOMEM);
1003	}
1004	for (i = 0; i < atop(length); i++) {
1005		ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
1006		    VM_MEMATTR_DEFAULT);
1007	}
1008	error = dmar_gas_map_region(domain, entry, DMAR_MAP_ENTRY_READ |
1009	    ((flags & BUS_DMA_NOWRITE) ? 0 : DMAR_MAP_ENTRY_WRITE),
1010	    waitok ? DMAR_GM_CANWAIT : 0, ma);
1011	if (error == 0) {
1012		DMAR_DOMAIN_LOCK(domain);
1013		TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
1014		entry->flags |= DMAR_MAP_ENTRY_MAP;
1015		DMAR_DOMAIN_UNLOCK(domain);
1016	} else {
1017		dmar_domain_unload_entry(entry, true);
1018	}
1019	for (i = 0; i < atop(length); i++)
1020		vm_page_putfake(ma[i]);
1021	free(ma, M_TEMP);
1022	return (error);
1023}
1024