1/*	$OpenBSD: iommu.c,v 1.83 2023/10/18 14:24:29 jan Exp $	*/
2/*	$NetBSD: iommu.c,v 1.47 2002/02/08 20:03:45 eeh Exp $	*/
3
4/*
5 * Copyright (c) 2003 Henric Jungheim
6 * Copyright (c) 2001, 2002 Eduardo Horvath
7 * Copyright (c) 1999, 2000 Matthew R. Green
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34/*
35 * UltraSPARC IOMMU support; used by both the sbus and pci code.
36 */
37#include <sys/param.h>
38#include <sys/extent.h>
39#include <sys/malloc.h>
40#include <sys/systm.h>
41#include <sys/proc.h>
42#include <sys/device.h>
43#include <sys/mbuf.h>
44
45#include <uvm/uvm_extern.h>
46
47#include <machine/bus.h>
48#include <sparc64/sparc64/cache.h>
49#include <sparc64/dev/iommureg.h>
50#include <sparc64/dev/iommuvar.h>
51
52#include <machine/autoconf.h>
53#include <machine/cpu.h>
54
55#ifdef DDB
56#include <machine/db_machdep.h>
57#include <ddb/db_sym.h>
58#include <ddb/db_extern.h>
59#endif
60
61#ifdef DEBUG
62#define IDB_BUSDMA	0x1
63#define IDB_IOMMU	0x2
64#define IDB_INFO	0x4
65#define IDB_SYNC	0x8
66#define IDB_XXX		0x10
67#define IDB_PRINT_MAP	0x20
68#define IDB_BREAK	0x40
69int iommudebug = IDB_INFO;
70#define DPRINTF(l, s)   do { if (iommudebug & l) printf s; } while (0)
71#else
72#define DPRINTF(l, s)
73#endif
74
75void iommu_enter(struct iommu_state *, struct strbuf_ctl *, bus_addr_t,
76    paddr_t, int);
77void iommu_remove(struct iommu_state *, struct strbuf_ctl *, bus_addr_t);
78int iommu_dvmamap_sync_range(struct strbuf_ctl*, bus_addr_t, bus_size_t);
79int iommu_strbuf_flush_done(struct iommu_map_state *);
80int iommu_dvmamap_load_seg(bus_dma_tag_t, struct iommu_state *,
81    bus_dmamap_t, bus_dma_segment_t *, int, int, bus_size_t, bus_size_t);
82int iommu_dvmamap_load_mlist(bus_dma_tag_t, struct iommu_state *,
83    bus_dmamap_t, struct pglist *, int, bus_size_t, bus_size_t);
84int iommu_dvmamap_validate_map(bus_dma_tag_t, struct iommu_state *,
85    bus_dmamap_t);
86void iommu_dvmamap_print_map(bus_dma_tag_t, struct iommu_state *,
87    bus_dmamap_t);
88int iommu_dvmamap_append_range(bus_dma_tag_t, bus_dmamap_t, paddr_t,
89    bus_size_t, int, bus_size_t);
90int iommu_dvmamap_insert(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
91    bus_size_t, int, bus_size_t);
92int64_t iommu_tsb_entry(struct iommu_state *, bus_addr_t);
93void strbuf_reset(struct strbuf_ctl *);
94int iommu_iomap_insert_page(struct iommu_map_state *, paddr_t);
95bus_addr_t iommu_iomap_translate(struct iommu_map_state *, paddr_t);
96void iommu_iomap_load_map(struct iommu_state *, struct iommu_map_state *,
97    bus_addr_t, int);
98void iommu_iomap_unload_map(struct iommu_state *, struct iommu_map_state *);
99struct iommu_map_state *iommu_iomap_create(int);
100void iommu_iomap_destroy(struct iommu_map_state *);
101void iommu_iomap_clear_pages(struct iommu_map_state *);
102void _iommu_dvmamap_sync(bus_dma_tag_t, bus_dma_tag_t, bus_dmamap_t,
103    bus_addr_t, bus_size_t, int);
104
105void iommu_hw_enable(struct iommu_state *);
106
107const struct iommu_hw iommu_hw_default = {
108	.ihw_enable	= iommu_hw_enable,
109
110	.ihw_dvma_pa	= IOTTE_PAMASK,
111
112	.ihw_bypass	= 0x3fffUL << 50,
113	.ihw_bypass_nc	= 0,
114	.ihw_bypass_ro	= 0,
115};
116
117void
118iommu_hw_enable(struct iommu_state *is)
119{
120	IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb);
121	IOMMUREG_WRITE(is, iommu_cr, IOMMUCR_EN | (is->is_tsbsize << 16));
122}
123
124/*
125 * Initiate an STC entry flush.
126 */
127static inline void
128iommu_strbuf_flush(struct strbuf_ctl *sb, bus_addr_t va)
129{
130#ifdef DEBUG
131	if (sb->sb_flush == NULL) {
132		printf("iommu_strbuf_flush: attempting to flush w/o STC\n");
133		return;
134	}
135#endif
136
137	bus_space_write_8(sb->sb_bustag, sb->sb_sb,
138	    STRBUFREG(strbuf_pgflush), va);
139}
140
141/*
142 * initialise the UltraSPARC IOMMU (SBus or PCI):
143 *	- allocate and setup the iotsb.
144 *	- enable the IOMMU
145 *	- initialise the streaming buffers (if they exist)
146 *	- create a private DVMA map.
147 */
148void
149iommu_init(char *name, const struct iommu_hw *ihw, struct iommu_state *is,
150    int tsbsize, u_int32_t iovabase)
151{
152	psize_t size;
153	vaddr_t va;
154	paddr_t pa;
155	struct vm_page *m;
156	struct pglist mlist;
157
158	/*
159	 * Setup the iommu.
160	 *
161	 * The sun4u iommu is part of the SBus or PCI controller so we will
162	 * deal with it here..
163	 *
164	 * For sysio and psycho/psycho+ the IOMMU address space always ends at
165	 * 0xffffe000, but the starting address depends on the size of the
166	 * map.  The map size is 1024 * 2 ^ is->is_tsbsize entries, where each
167	 * entry is 8 bytes.  The start of the map can be calculated by
168	 * (0xffffe000 << (8 + is->is_tsbsize)).
169	 *
170	 * But sabre and hummingbird use a different scheme that seems to
171	 * be hard-wired, so we read the start and size from the PROM and
172	 * just use those values.
173	 */
174
175	is->is_hw = ihw;
176
177	is->is_tsbsize = tsbsize;
178	if (iovabase == (u_int32_t)-1) {
179		is->is_dvmabase = IOTSB_VSTART(is->is_tsbsize);
180		is->is_dvmaend = IOTSB_VEND;
181	} else {
182		is->is_dvmabase = iovabase;
183		is->is_dvmaend = iovabase + IOTSB_VSIZE(tsbsize) - 1;
184	}
185
186	/*
187	 * Allocate memory for I/O pagetables.  They need to be physically
188	 * contiguous.
189	 */
190
191	size = PAGE_SIZE << is->is_tsbsize;
192	TAILQ_INIT(&mlist);
193	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
194	    (paddr_t)PAGE_SIZE, (paddr_t)0, &mlist, 1, UVM_PLA_NOWAIT) != 0)
195		panic("iommu_init: no memory");
196
197	va = (vaddr_t)km_alloc(size, &kv_any, &kp_none, &kd_nowait);
198	if (va == 0)
199		panic("iommu_init: no memory");
200	is->is_tsb = (int64_t *)va;
201
202	m = TAILQ_FIRST(&mlist);
203	is->is_ptsb = VM_PAGE_TO_PHYS(m);
204
205	/* Map the pages */
206	for (; m != NULL; m = TAILQ_NEXT(m,pageq)) {
207		pa = VM_PAGE_TO_PHYS(m);
208		pmap_enter(pmap_kernel(), va, pa | PMAP_NVC,
209		    PROT_READ | PROT_WRITE,
210		    PROT_READ | PROT_WRITE | PMAP_WIRED);
211		va += PAGE_SIZE;
212	}
213	pmap_update(pmap_kernel());
214	memset(is->is_tsb, 0, size);
215
216	TAILQ_INIT(&mlist);
217	if (uvm_pglistalloc(PAGE_SIZE, 0, -1, PAGE_SIZE, 0, &mlist, 1,
218	    UVM_PLA_NOWAIT | UVM_PLA_ZERO) != 0)
219		panic("%s: no memory", __func__);
220	m = TAILQ_FIRST(&mlist);
221	is->is_scratch = VM_PAGE_TO_PHYS(m);
222
223#ifdef DEBUG
224	if (iommudebug & IDB_INFO) {
225		/* Probe the iommu */
226		/* The address or contents of the regs...? */
227		printf("iommu regs at: cr=%lx tsb=%lx flush=%lx\n",
228		    (u_long)bus_space_vaddr(is->is_bustag, is->is_iommu) +
229			IOMMUREG(iommu_cr),
230		    (u_long)bus_space_vaddr(is->is_bustag, is->is_iommu) +
231			IOMMUREG(iommu_tsb),
232		    (u_long)bus_space_vaddr(is->is_bustag, is->is_iommu) +
233			IOMMUREG(iommu_flush));
234		printf("iommu cr=%llx tsb=%llx\n",
235		    IOMMUREG_READ(is, iommu_cr),
236		    IOMMUREG_READ(is, iommu_tsb));
237		printf("TSB base %p phys %llx\n",
238		    (void *)is->is_tsb, (unsigned long long)is->is_ptsb);
239		delay(1000000); /* 1 s */
240	}
241#endif
242
243	/*
244	 * Now all the hardware's working we need to allocate a dvma map.
245	 */
246	printf("dvma map %x-%x", is->is_dvmabase, is->is_dvmaend);
247#ifdef DEBUG
248	printf(", iotdb %llx-%llx",
249	    (unsigned long long)is->is_ptsb,
250	    (unsigned long long)(is->is_ptsb + size));
251#endif
252	is->is_dvmamap = extent_create(name,
253	    is->is_dvmabase, (u_long)is->is_dvmaend + 1,
254	    M_DEVBUF, NULL, 0, EX_NOCOALESCE);
255	mtx_init(&is->is_mtx, IPL_HIGH);
256
257	/*
258	 * Now actually start up the IOMMU.
259	 */
260	iommu_reset(is);
261	printf("\n");
262}
263
264/*
265 * Streaming buffers don't exist on the UltraSPARC IIi/e; we should have
266 * detected that already and disabled them.  If not, we will notice that
267 * they aren't there when the STRBUF_EN bit does not remain.
268 */
269void
270iommu_reset(struct iommu_state *is)
271{
272	int i;
273
274	(*is->is_hw->ihw_enable)(is);
275
276	for (i = 0; i < 2; ++i) {
277		struct strbuf_ctl *sb = is->is_sb[i];
278
279		if (sb == NULL)
280			continue;
281
282		sb->sb_iommu = is;
283		strbuf_reset(sb);
284
285		if (sb->sb_flush)
286			printf(", STC%d enabled", i);
287	}
288
289	if (ISSET(is->is_hw->ihw_flags, IOMMU_HW_FLUSH_CACHE))
290		IOMMUREG_WRITE(is, iommu_cache_invalidate, -1ULL);
291}
292
293/*
294 * Initialize one STC.
295 */
296void
297strbuf_reset(struct strbuf_ctl *sb)
298{
299	if(sb->sb_flush == NULL)
300		return;
301
302	bus_space_write_8(sb->sb_bustag, sb->sb_sb,
303	    STRBUFREG(strbuf_ctl), STRBUF_EN);
304
305	__membar("#Lookaside");
306
307	/* No streaming buffers? Disable them */
308	if (bus_space_read_8(sb->sb_bustag, sb->sb_sb,
309	    STRBUFREG(strbuf_ctl)) == 0) {
310		sb->sb_flush = NULL;
311	} else {
312		/*
313		 * locate the pa of the flush buffer
314		 */
315		if (pmap_extract(pmap_kernel(),
316		    (vaddr_t)sb->sb_flush, &sb->sb_flushpa) == FALSE)
317			sb->sb_flush = NULL;
318		mtx_init(&sb->sb_mtx, IPL_HIGH);
319	}
320}
321
322/*
323 * Add an entry to the IOMMU table.
324 *
325 * The entry is marked streaming if an STC was detected and
326 * the BUS_DMA_STREAMING flag is set.
327 */
328void
329iommu_enter(struct iommu_state *is, struct strbuf_ctl *sb, bus_addr_t va,
330    paddr_t pa, int flags)
331{
332	int64_t tte;
333	volatile int64_t *tte_ptr = &is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)];
334
335#ifdef DIAGNOSTIC
336	if (va < is->is_dvmabase || (va + PAGE_MASK) > is->is_dvmaend)
337		panic("iommu_enter: va %#lx not in DVMA space", va);
338
339	tte = *tte_ptr;
340
341	if (tte & IOTTE_V) {
342		printf("Overwriting valid tte entry (dva %lx pa %lx "
343		    "&tte %p tte %llx)\n", va, pa, tte_ptr, tte);
344		extent_print(is->is_dvmamap);
345		panic("IOMMU overwrite");
346	}
347#endif
348
349	tte = MAKEIOTTE(pa, !(flags & BUS_DMA_NOWRITE),
350	    !(flags & BUS_DMA_NOCACHE), (flags & BUS_DMA_STREAMING));
351
352	DPRINTF(IDB_IOMMU, ("Clearing TSB slot %d for va %p\n",
353	    (int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va));
354
355	*tte_ptr = tte;
356
357	/*
358	 * Why bother to flush this va?  It should only be relevant for
359	 * V ==> V or V ==> non-V transitions.  The former is illegal and
360	 * the latter is never done here.  It is true that this provides
361	 * some protection against a misbehaving master using an address
362	 * after it should.  The IOMMU documentations specifically warns
363	 * that the consequences of a simultaneous IOMMU flush and DVMA
364	 * access to the same address are undefined.  (By that argument,
365	 * the STC should probably be flushed as well.)   Note that if
366	 * a bus master keeps using a memory region after it has been
367	 * unmapped, the specific behavior of the IOMMU is likely to
368	 * be the least of our worries.
369	 */
370	IOMMUREG_WRITE(is, iommu_flush, va);
371
372	DPRINTF(IDB_IOMMU, ("iommu_enter: va %lx pa %lx TSB[%lx]@%p=%lx\n",
373	    va, (long)pa, (u_long)IOTSBSLOT(va,is->is_tsbsize),
374	    (void *)(u_long)&is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)],
375	    (u_long)tte));
376}
377
378/*
379 * Remove an entry from the IOMMU table.
380 *
381 * The entry is flushed from the STC if an STC is detected and the TSB
382 * entry has the IOTTE_STREAM flags set.  It should be impossible for
383 * the TSB entry to have this flag set without the BUS_DMA_STREAMING
384 * flag, but better to be safe.  (The IOMMU will be ignored as long
385 * as an STC entry exists.)
386 */
387void
388iommu_remove(struct iommu_state *is, struct strbuf_ctl *sb, bus_addr_t va)
389{
390	int64_t *tte_ptr = &is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)];
391	int64_t tte;
392
393#ifdef DIAGNOSTIC
394	if (va < is->is_dvmabase || (va + PAGE_MASK) > is->is_dvmaend)
395		panic("iommu_remove: va 0x%lx not in DVMA space", (u_long)va);
396	if (va != trunc_page(va)) {
397		printf("iommu_remove: unaligned va: %lx\n", va);
398		va = trunc_page(va);
399	}
400#endif
401	tte = *tte_ptr;
402
403	DPRINTF(IDB_IOMMU, ("iommu_remove: va %lx TSB[%llx]@%p\n",
404	    va, tte, tte_ptr));
405
406#ifdef DIAGNOSTIC
407	if ((tte & IOTTE_V) == 0) {
408		printf("Removing invalid tte entry (dva %lx &tte %p "
409		    "tte %llx)\n", va, tte_ptr, tte);
410		extent_print(is->is_dvmamap);
411		panic("IOMMU remove overwrite");
412	}
413#endif
414
415	*tte_ptr = tte & ~IOTTE_V;
416
417	/*
418	 * IO operations are strongly ordered WRT each other.  It is
419	 * unclear how they relate to normal memory accesses.
420	 */
421	__membar("#StoreStore");
422
423	IOMMUREG_WRITE(is, iommu_flush, va);
424
425	if (sb && (tte & IOTTE_STREAM))
426		iommu_strbuf_flush(sb, va);
427
428	/* Should we sync the iommu and stc here? */
429}
430
431/*
432 * Find the physical address of a DVMA address (debug routine).
433 */
434paddr_t
435iommu_extract(struct iommu_state *is, bus_addr_t dva)
436{
437	int64_t tte = 0;
438
439	if (dva >= is->is_dvmabase && dva <= is->is_dvmaend)
440		tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
441
442	return (tte & is->is_hw->ihw_dvma_pa);
443}
444
445/*
446 * Lookup a TSB entry for a given DVMA (debug routine).
447 */
448int64_t
449iommu_lookup_tte(struct iommu_state *is, bus_addr_t dva)
450{
451	int64_t tte = 0;
452
453	if (dva >= is->is_dvmabase && dva <= is->is_dvmaend)
454		tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
455
456	return (tte);
457}
458
459/*
460 * Lookup a TSB entry at a given physical address (debug routine).
461 */
462int64_t
463iommu_fetch_tte(struct iommu_state *is, paddr_t pa)
464{
465	int64_t tte = 0;
466
467	if (pa >= is->is_ptsb && pa < is->is_ptsb +
468	    (PAGE_SIZE << is->is_tsbsize))
469		tte = ldxa(pa, ASI_PHYS_CACHED);
470
471	return (tte);
472}
473
474/*
475 * Fetch a TSB entry with some sanity checking.
476 */
477int64_t
478iommu_tsb_entry(struct iommu_state *is, bus_addr_t dva)
479{
480	int64_t tte;
481
482	if (dva < is->is_dvmabase || dva > is->is_dvmaend)
483		panic("invalid dva: %llx", (long long)dva);
484
485	tte = is->is_tsb[IOTSBSLOT(dva,is->is_tsbsize)];
486
487	if ((tte & IOTTE_V) == 0)
488		panic("iommu_tsb_entry: invalid entry %lx", dva);
489
490	return (tte);
491}
492
493/*
494 * Initiate and then block until an STC flush synchronization has completed.
495 */
496int
497iommu_strbuf_flush_done(struct iommu_map_state *ims)
498{
499	struct strbuf_ctl *sb = ims->ims_sb;
500	struct strbuf_flush *sf = &ims->ims_flush;
501	struct timeval cur, flushtimeout;
502	struct timeval to = { 0, 500000 };
503	u_int64_t flush;
504	int timeout_started = 0;
505
506#ifdef DIAGNOSTIC
507	if (sb == NULL) {
508		panic("iommu_strbuf_flush_done: invalid flush buffer");
509	}
510#endif
511
512	mtx_enter(&sb->sb_mtx);
513
514	/*
515	 * Streaming buffer flushes:
516	 *
517	 *   1 Tell strbuf to flush by storing va to strbuf_pgflush.
518	 *   2 Store 0 in flag
519	 *   3 Store pointer to flag in flushsync
520	 *   4 wait till flushsync becomes 0x1
521	 *
522	 * If it takes more than .5 sec, something went very, very wrong.
523	 */
524
525	/*
526	 * If we're reading from ASI_PHYS_CACHED, then we'll write to
527	 * it too.  No need to tempt fate or learn about Si bugs or such.
528	 * FreeBSD just uses normal "volatile" reads/writes...
529	 */
530
531	stxa(sf->sbf_flushpa, ASI_PHYS_CACHED, 0);
532
533	/*
534	 * Insure any previous strbuf operations are complete and that
535	 * memory is initialized before the IOMMU uses it.
536	 * Is this Needed?  How are IO and memory operations ordered?
537	 */
538	__membar("#StoreStore");
539
540	bus_space_write_8(sb->sb_bustag, sb->sb_sb,
541		    STRBUFREG(strbuf_flushsync), sf->sbf_flushpa);
542
543	DPRINTF(IDB_IOMMU,
544	    ("iommu_strbuf_flush_done: flush = %llx pa = %lx\n",
545		ldxa(sf->sbf_flushpa, ASI_PHYS_CACHED), sf->sbf_flushpa));
546
547	__membar("#StoreLoad | #Lookaside");
548
549	for(;;) {
550		int i;
551
552		/*
553		 * Try to shave a few instruction cycles off the average
554		 * latency by only checking the elapsed time every few
555		 * fetches.
556		 */
557		for (i = 0; i < 1000; ++i) {
558			__membar("#LoadLoad");
559			/* Bypass non-coherent D$ */
560			/* non-coherent...?   Huh? */
561			flush = ldxa(sf->sbf_flushpa, ASI_PHYS_CACHED);
562
563			if (flush) {
564				DPRINTF(IDB_IOMMU,
565				    ("iommu_strbuf_flush_done: flushed\n"));
566				mtx_leave(&sb->sb_mtx);
567				return (0);
568			}
569		}
570
571		microtime(&cur);
572
573		if (timeout_started) {
574			if (timercmp(&cur, &flushtimeout, >))
575				panic("STC timeout at %lx (%lld)",
576				    sf->sbf_flushpa, flush);
577		} else {
578			timeradd(&cur, &to, &flushtimeout);
579
580			timeout_started = 1;
581
582			DPRINTF(IDB_IOMMU,
583			    ("iommu_strbuf_flush_done: flush = %llx pa = %lx "
584				"now=%llx:%lx until = %llx:%lx\n",
585				ldxa(sf->sbf_flushpa, ASI_PHYS_CACHED),
586				sf->sbf_flushpa, cur.tv_sec, cur.tv_usec,
587				flushtimeout.tv_sec, flushtimeout.tv_usec));
588		}
589	}
590}
591
592/*
593 * IOMMU DVMA operations, common to SBus and PCI.
594 */
595
596#define BUS_DMA_FIND_PARENT(t, fn)                                      \
597        if (t->_parent == NULL)                                         \
598                panic("null bus_dma parent (" #fn ")");                 \
599        for (t = t->_parent; t->fn == NULL; t = t->_parent)             \
600                if (t->_parent == NULL)                                 \
601                        panic("no bus_dma " #fn " located");
602
603int
604iommu_dvmamap_create(bus_dma_tag_t t, bus_dma_tag_t t0, struct strbuf_ctl *sb,
605    bus_size_t size, int nsegments, bus_size_t maxsegsz, bus_size_t boundary,
606    int flags, bus_dmamap_t *dmamap)
607{
608	int ret;
609	bus_dmamap_t map;
610	struct iommu_state *is = sb->sb_iommu;
611	struct iommu_map_state *ims;
612
613	BUS_DMA_FIND_PARENT(t, _dmamap_create);
614	ret = (*t->_dmamap_create)(t, t0, size, nsegments, maxsegsz, boundary,
615	    flags, &map);
616
617	if (ret)
618		return (ret);
619
620	if (flags & BUS_DMA_64BIT) {
621		map->_dm_cookie = is;
622		*dmamap = map;
623		return (0);
624	}
625
626	ims = iommu_iomap_create(atop(round_page(size)));
627
628	if (ims == NULL) {
629		bus_dmamap_destroy(t0, map);
630		return (ENOMEM);
631	}
632
633	ims->ims_sb = sb;
634	map->_dm_cookie = ims;
635
636#ifdef DIAGNOSTIC
637	if (ims->ims_sb == NULL)
638		panic("iommu_dvmamap_create: null sb");
639	if (ims->ims_sb->sb_iommu == NULL)
640		panic("iommu_dvmamap_create: null iommu");
641#endif
642	*dmamap = map;
643
644	return (0);
645}
646
647void
648iommu_dvmamap_destroy(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map)
649{
650	/*
651	 * The specification (man page) requires a loaded
652	 * map to be unloaded before it is destroyed.
653	 */
654	if (map->dm_nsegs)
655		bus_dmamap_unload(t0, map);
656
657	if (!ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
658	        if (map->_dm_cookie)
659			iommu_iomap_destroy(map->_dm_cookie);
660	}
661	map->_dm_cookie = NULL;
662
663	BUS_DMA_FIND_PARENT(t, _dmamap_destroy);
664	(*t->_dmamap_destroy)(t, t0, map);
665}
666
667/*
668 * Load a contiguous kva buffer into a dmamap.  The physical pages are
669 * not assumed to be contiguous.  Two passes are made through the buffer
670 * and both call pmap_extract() for the same va->pa translations.  It
671 * is possible to run out of pa->dvma mappings; the code should be smart
672 * enough to resize the iomap (when the "flags" permit allocation).  It
673 * is trivial to compute the number of entries required (round the length
674 * up to the page size and then divide by the page size)...
675 */
676int
677iommu_dvmamap_load(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
678    void *buf, bus_size_t buflen, struct proc *p, int flags)
679{
680	int err = 0;
681	bus_size_t sgsize;
682	u_long dvmaddr, sgstart, sgend;
683	bus_size_t align, boundary;
684	struct iommu_state *is;
685	struct iommu_map_state *ims;
686	pmap_t pmap;
687
688	/*
689	 * Make sure that on error condition we return "no valid mappings".
690	 */
691	KASSERTMSG(map->dm_nsegs == 0, "map still in use");
692
693	if (ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
694		unsigned long bypass;
695		int i;
696
697		is = map->_dm_cookie;
698		bypass = is->is_hw->ihw_bypass;
699
700		/* Bypass translation by the IOMMU. */
701
702		BUS_DMA_FIND_PARENT(t, _dmamap_load);
703		err = (*t->_dmamap_load)(t, t0, map, buf, buflen, p, flags);
704		if (err != 0)
705			return (err);
706
707		for (i = 0; i < map->dm_nsegs; i++)
708			map->dm_segs[i].ds_addr |= bypass;
709
710		return (0);
711	}
712
713	ims = map->_dm_cookie;
714	is = ims->ims_sb->sb_iommu;
715
716	if (buflen < 1 || buflen > map->_dm_size) {
717		DPRINTF(IDB_BUSDMA,
718		    ("iommu_dvmamap_load(): error %d > %d -- "
719		     "map size exceeded!\n", (int)buflen, (int)map->_dm_size));
720		return (EINVAL);
721	}
722
723	/*
724	 * A boundary presented to bus_dmamem_alloc() takes precedence
725	 * over boundary in the map.
726	 */
727	if ((boundary = (map->dm_segs[0]._ds_boundary)) == 0)
728		boundary = map->_dm_boundary;
729	align = MAX(map->dm_segs[0]._ds_align, PAGE_SIZE);
730
731	pmap = p ? p->p_vmspace->vm_map.pmap : pmap_kernel();
732
733	/* Count up the total number of pages we need */
734	iommu_iomap_clear_pages(ims);
735	{ /* Scope */
736		bus_addr_t a, aend;
737		bus_addr_t addr = (bus_addr_t)buf;
738		int seg_len = buflen;
739
740		aend = round_page(addr + seg_len);
741		for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
742			paddr_t pa;
743
744			if (pmap_extract(pmap, a, &pa) == FALSE)
745				panic("iomap pmap error addr 0x%lx", a);
746
747			err = iommu_iomap_insert_page(ims, pa);
748			if (err) {
749				printf("iomap insert error: %d for "
750				    "va 0x%lx pa 0x%lx "
751				    "(buf %p len %ld/%lx)\n",
752				    err, a, pa, buf, buflen, buflen);
753				iommu_dvmamap_print_map(t, is, map);
754				iommu_iomap_clear_pages(ims);
755				return (EFBIG);
756			}
757		}
758	}
759	if (flags & BUS_DMA_OVERRUN) {
760		err = iommu_iomap_insert_page(ims, is->is_scratch);
761		if (err) {
762			iommu_iomap_clear_pages(ims);
763			return (EFBIG);
764		}
765	}
766	sgsize = ims->ims_map.ipm_pagecnt * PAGE_SIZE;
767
768	mtx_enter(&is->is_mtx);
769	if (flags & BUS_DMA_24BIT) {
770		sgstart = MAX(is->is_dvmamap->ex_start, 0xff000000);
771		sgend = MIN(is->is_dvmamap->ex_end, 0xffffffff);
772	} else {
773		sgstart = is->is_dvmamap->ex_start;
774		sgend = is->is_dvmamap->ex_end;
775	}
776
777	/*
778	 * If our segment size is larger than the boundary we need to
779	 * split the transfer up into little pieces ourselves.
780	 */
781	err = extent_alloc_subregion_with_descr(is->is_dvmamap, sgstart, sgend,
782	    sgsize, align, 0, (sgsize > boundary) ? 0 : boundary,
783	    EX_NOWAIT | EX_BOUNDZERO, &ims->ims_er, (u_long *)&dvmaddr);
784	mtx_leave(&is->is_mtx);
785
786#ifdef DEBUG
787	if (err || (dvmaddr == (bus_addr_t)-1))	{
788		printf("iommu_dvmamap_load(): extent_alloc(%d, %x) failed!\n",
789		    (int)sgsize, flags);
790#ifdef DDB
791		if (iommudebug & IDB_BREAK)
792			db_enter();
793#endif
794	}
795#endif
796	if (err != 0) {
797		iommu_iomap_clear_pages(ims);
798		return (err);
799	}
800
801	/* Set the active DVMA map */
802	map->_dm_dvmastart = dvmaddr;
803	map->_dm_dvmasize = sgsize;
804
805	map->dm_mapsize = buflen;
806
807#ifdef DEBUG
808	iommu_dvmamap_validate_map(t, is, map);
809#endif
810
811	iommu_iomap_load_map(is, ims, dvmaddr, flags);
812
813	{ /* Scope */
814		bus_addr_t a, aend;
815		bus_addr_t addr = (bus_addr_t)buf;
816		int seg_len = buflen;
817
818		aend = round_page(addr + seg_len);
819		for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
820			bus_addr_t pgstart;
821			bus_addr_t pgend;
822			paddr_t pa;
823			int pglen;
824
825			/* Yuck... Redoing the same pmap_extract... */
826			if (pmap_extract(pmap, a, &pa) == FALSE)
827				panic("iomap pmap error addr 0x%lx", a);
828
829			pgstart = pa | (MAX(a, addr) & PAGE_MASK);
830			pgend = pa | (MIN(a + PAGE_SIZE - 1,
831			    addr + seg_len - 1) & PAGE_MASK);
832			pglen = pgend - pgstart + 1;
833
834			if (pglen < 1)
835				continue;
836
837			err = iommu_dvmamap_append_range(t, map, pgstart,
838			    pglen, flags, boundary);
839			if (err == EFBIG)
840				break;
841			else if (err) {
842				printf("iomap load seg page: %d for "
843				    "va 0x%lx pa %lx (%lx - %lx) "
844				    "for %d/0x%x\n",
845				    err, a, pa, pgstart, pgend, pglen, pglen);
846				break;
847			}
848		}
849	}
850#ifdef DEBUG
851	iommu_dvmamap_validate_map(t, is, map);
852
853	if (err)
854		printf("**** iommu_dvmamap_load failed with error %d\n",
855		    err);
856
857	if (err || (iommudebug & IDB_PRINT_MAP)) {
858		iommu_dvmamap_print_map(t, is, map);
859#ifdef DDB
860		if (iommudebug & IDB_BREAK)
861			db_enter();
862#endif
863	}
864#endif
865	if (err)
866		iommu_dvmamap_unload(t, t0, map);
867
868	return (err);
869}
870
871/*
872 * Load a dvmamap from an array of segs or an mlist (if the first
873 * "segs" entry's mlist is non-null).  It calls iommu_dvmamap_load_segs()
874 * or iommu_dvmamap_load_mlist() for part of the 2nd pass through the
875 * mapping.  This is ugly.  A better solution would probably be to have
876 * function pointers for implementing the traversal.  That way, there
877 * could be one core load routine for each of the three required algorithms
878 * (buffer, seg, and mlist).  That would also mean that the traversal
879 * algorithm would then only need one implementation for each algorithm
880 * instead of two (one for populating the iomap and one for populating
881 * the dvma map).
882 */
883int
884iommu_dvmamap_load_raw(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
885    bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
886{
887	int i;
888	int left;
889	int err = 0;
890	bus_size_t sgsize;
891	bus_size_t boundary, align;
892	u_long dvmaddr, sgstart, sgend;
893	struct iommu_state *is;
894	struct iommu_map_state *ims;
895
896	KASSERTMSG(map->dm_nsegs == 0, "map still in use");
897
898	/*
899	 * A boundary presented to bus_dmamem_alloc() takes precedence
900	 * over boundary in the map.
901	 */
902	if ((boundary = segs[0]._ds_boundary) == 0)
903		boundary = map->_dm_boundary;
904
905	if (ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
906		unsigned long bypass;
907
908		is = map->_dm_cookie;
909		bypass = is->is_hw->ihw_bypass;
910
911		/* Bypass translation by the IOMMU. */
912		for (i = 0; i < nsegs; i++) {
913			err = iommu_dvmamap_insert(t, map,
914			    bypass | segs[i].ds_addr, segs[i].ds_len,
915			    0, boundary);
916			if (err != 0) {
917				map->dm_nsegs = 0;
918				return (err);
919			}
920		}
921
922		map->dm_mapsize = size;
923
924		return (0);
925	}
926
927	ims = map->_dm_cookie;
928	is = ims->ims_sb->sb_iommu;
929
930	align = MAX(segs[0]._ds_align, PAGE_SIZE);
931
932	/*
933	 * Make sure that on error condition we return "no valid mappings".
934	 */
935	map->dm_nsegs = 0;
936
937	iommu_iomap_clear_pages(ims);
938	if (segs[0]._ds_mlist) {
939		struct pglist *mlist = segs[0]._ds_mlist;
940		struct vm_page *m;
941		for (m = TAILQ_FIRST(mlist); m != NULL;
942		    m = TAILQ_NEXT(m,pageq)) {
943			err = iommu_iomap_insert_page(ims, VM_PAGE_TO_PHYS(m));
944
945			if(err) {
946				printf("iomap insert error: %d for "
947				    "pa 0x%lx\n", err, VM_PAGE_TO_PHYS(m));
948				iommu_dvmamap_print_map(t, is, map);
949				iommu_iomap_clear_pages(ims);
950				return (EFBIG);
951			}
952		}
953	} else {
954		/* Count up the total number of pages we need */
955		for (i = 0, left = size; left > 0 && i < nsegs; i++) {
956			bus_addr_t a, aend;
957			bus_size_t len = segs[i].ds_len;
958			bus_addr_t addr = segs[i].ds_addr;
959			int seg_len = MIN(left, len);
960
961			if (len < 1)
962				continue;
963
964			aend = round_page(addr + seg_len);
965			for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
966
967				err = iommu_iomap_insert_page(ims, a);
968				if (err) {
969					printf("iomap insert error: %d for "
970					    "pa 0x%lx\n", err, a);
971					iommu_dvmamap_print_map(t, is, map);
972					iommu_iomap_clear_pages(ims);
973					return (EFBIG);
974				}
975			}
976
977			left -= seg_len;
978		}
979	}
980	if (flags & BUS_DMA_OVERRUN) {
981		err = iommu_iomap_insert_page(ims, is->is_scratch);
982		if (err) {
983			iommu_iomap_clear_pages(ims);
984			return (EFBIG);
985		}
986	}
987	sgsize = ims->ims_map.ipm_pagecnt * PAGE_SIZE;
988
989	mtx_enter(&is->is_mtx);
990	if (flags & BUS_DMA_24BIT) {
991		sgstart = MAX(is->is_dvmamap->ex_start, 0xff000000);
992		sgend = MIN(is->is_dvmamap->ex_end, 0xffffffff);
993	} else {
994		sgstart = is->is_dvmamap->ex_start;
995		sgend = is->is_dvmamap->ex_end;
996	}
997
998	/*
999	 * If our segment size is larger than the boundary we need to
1000	 * split the transfer up into little pieces ourselves.
1001	 */
1002	err = extent_alloc_subregion_with_descr(is->is_dvmamap, sgstart, sgend,
1003	    sgsize, align, 0, (sgsize > boundary) ? 0 : boundary,
1004	    EX_NOWAIT | EX_BOUNDZERO, &ims->ims_er, (u_long *)&dvmaddr);
1005	mtx_leave(&is->is_mtx);
1006
1007	if (err != 0) {
1008		iommu_iomap_clear_pages(ims);
1009		return (err);
1010	}
1011
1012#ifdef DEBUG
1013	if (dvmaddr == (bus_addr_t)-1)	{
1014		printf("iommu_dvmamap_load_raw(): extent_alloc(%d, %x) "
1015		    "failed!\n", (int)sgsize, flags);
1016#ifdef DDB
1017		if (iommudebug & IDB_BREAK)
1018			db_enter();
1019#else
1020		panic("");
1021#endif
1022	}
1023#endif
1024
1025	/* Set the active DVMA map */
1026	map->_dm_dvmastart = dvmaddr;
1027	map->_dm_dvmasize = sgsize;
1028
1029	map->dm_mapsize = size;
1030
1031#ifdef DEBUG
1032	iommu_dvmamap_validate_map(t, is, map);
1033#endif
1034
1035	iommu_iomap_load_map(is, ims, dvmaddr, flags);
1036
1037	if (segs[0]._ds_mlist)
1038		err = iommu_dvmamap_load_mlist(t, is, map, segs[0]._ds_mlist,
1039		    flags, size, boundary);
1040	else
1041		err = iommu_dvmamap_load_seg(t, is, map, segs, nsegs,
1042		    flags, size, boundary);
1043
1044#ifdef DEBUG
1045	/* The map should be valid even if the load failed */
1046	if (iommu_dvmamap_validate_map(t, is, map)) {
1047		printf("load size %ld/0x%lx\n", size, size);
1048		if (segs[0]._ds_mlist)
1049			printf("mlist %p\n", segs[0]._ds_mlist);
1050		else  {
1051			long tot_len = 0;
1052			long clip_len = 0;
1053			printf("segs %p nsegs %d\n", segs, nsegs);
1054
1055			left = size;
1056			for(i = 0; i < nsegs; i++) {
1057				bus_size_t len = segs[i].ds_len;
1058				bus_addr_t addr = segs[i].ds_addr;
1059				int seg_len = MIN(left, len);
1060
1061				printf("addr %lx len %ld/0x%lx seg_len "
1062				    "%d/0x%x left %d/0x%x\n", addr,
1063				    len, len, seg_len, seg_len, left, left);
1064
1065				left -= seg_len;
1066
1067				clip_len += seg_len;
1068				tot_len += segs[i].ds_len;
1069			}
1070			printf("total length %ld/0x%lx total seg. "
1071			    "length %ld/0x%lx\n", tot_len, tot_len, clip_len,
1072			    clip_len);
1073		}
1074
1075		if (err == 0)
1076			err = 1;
1077	}
1078
1079	if (err)
1080		printf("**** iommu_dvmamap_load_raw failed with error %d\n",
1081		    err);
1082
1083	if (err || (iommudebug & IDB_PRINT_MAP)) {
1084		iommu_dvmamap_print_map(t, is, map);
1085#ifdef DDB
1086		if (iommudebug & IDB_BREAK)
1087			db_enter();
1088#endif
1089	}
1090#endif
1091	if (err)
1092		iommu_dvmamap_unload(t, t0, map);
1093
1094	return (err);
1095}
1096
1097/*
1098 * Insert a range of addresses into a loaded map respecting the specified
1099 * boundary and alignment restrictions.  The range is specified by its
1100 * physical address and length.  The range cannot cross a page boundary.
1101 * This code (along with most of the rest of the function in this file)
1102 * assumes that the IOMMU page size is equal to PAGE_SIZE.
1103 */
1104int
1105iommu_dvmamap_append_range(bus_dma_tag_t t, bus_dmamap_t map, paddr_t pa,
1106    bus_size_t length, int flags, bus_size_t boundary)
1107{
1108	struct iommu_map_state *ims = map->_dm_cookie;
1109	bus_addr_t sgstart = iommu_iomap_translate(ims, pa);
1110
1111	return (iommu_dvmamap_insert(t, map, sgstart, length, flags, boundary));
1112}
1113
1114int
1115iommu_dvmamap_insert(bus_dma_tag_t t, bus_dmamap_t map,
1116    bus_addr_t sgstart, bus_size_t length, int flags, bus_size_t boundary)
1117{
1118	bus_addr_t sgend = sgstart + length - 1;
1119	bus_addr_t bd_mask;
1120	bus_dma_segment_t *seg = NULL;
1121	int i = map->dm_nsegs;
1122
1123#ifdef DIAGNOSTIC
1124	if (sgstart == 0 || sgstart > sgend) {
1125		printf("append range invalid mapping for "
1126		    "0x%lx - 0x%lx\n", sgstart, sgend);
1127		map->dm_nsegs = 0;
1128		return (EINVAL);
1129	}
1130#endif
1131
1132#ifdef DEBUG
1133	if (trunc_page(sgstart) != trunc_page(sgend)) {
1134		printf("append range crossing page boundary! "
1135		    "length %ld/0x%lx sgstart %lx sgend %lx\n",
1136		    length, length, sgstart, sgend);
1137	}
1138#endif
1139
1140	/*
1141	 * We will attempt to merge this range with the previous entry
1142	 * (if there is one).
1143	 */
1144	if (i > 0) {
1145		seg = &map->dm_segs[i - 1];
1146		if (sgstart == seg->ds_addr + seg->ds_len &&
1147		    length + seg->ds_len <= map->_dm_maxsegsz) {
1148			length += seg->ds_len;
1149			sgstart = seg->ds_addr;
1150			sgend = sgstart + length - 1;
1151		} else
1152			seg = NULL;
1153	}
1154
1155	if (seg == NULL) {
1156		seg = &map->dm_segs[i];
1157		if (++i > map->_dm_segcnt) {
1158			map->dm_nsegs = 0;
1159			return (EFBIG);
1160		}
1161	}
1162
1163	/*
1164	 * At this point, "i" is the index of the *next* bus_dma_segment_t
1165	 * (the segment count, aka map->dm_nsegs) and "seg" points to the
1166	 * *current* entry.  "length", "sgstart", and "sgend" reflect what
1167	 * we intend to put in "*seg".  No assumptions should be made about
1168	 * the contents of "*seg".  Only "boundary" issue can change this
1169	 * and "boundary" is often zero, so explicitly test for that case
1170	 * (the test is strictly an optimization).
1171	 */
1172	if (boundary != 0) {
1173		bd_mask = ~(boundary - 1);
1174
1175		while ((sgstart & bd_mask) != (sgend & bd_mask)) {
1176			/*
1177			 * We are crossing a boundary so fill in the current
1178			 * segment with as much as possible, then grab a new
1179			 * one.
1180			 */
1181
1182			seg->ds_addr = sgstart;
1183			seg->ds_len = boundary - (sgstart & ~bd_mask);
1184
1185			sgstart += seg->ds_len; /* sgend stays the same */
1186			length -= seg->ds_len;
1187
1188			seg = &map->dm_segs[i];
1189			if (++i > map->_dm_segcnt) {
1190				map->dm_nsegs = 0;
1191				return (EFBIG);
1192			}
1193		}
1194	}
1195
1196	seg->ds_addr = sgstart;
1197	seg->ds_len = length;
1198	map->dm_nsegs = i;
1199
1200	return (0);
1201}
1202
1203/*
1204 * Populate the iomap from a bus_dma_segment_t array.  See note for
1205 * iommu_dvmamap_load() * regarding page entry exhaustion of the iomap.
1206 * This is less of a problem for load_seg, as the number of pages
1207 * is usually similar to the number of segments (nsegs).
1208 */
1209int
1210iommu_dvmamap_load_seg(bus_dma_tag_t t, struct iommu_state *is,
1211    bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int flags,
1212    bus_size_t size, bus_size_t boundary)
1213{
1214	int i;
1215	int left;
1216	int seg;
1217
1218	/*
1219	 * This segs is made up of individual physical
1220	 * segments, probably by _bus_dmamap_load_uio() or
1221	 * _bus_dmamap_load_mbuf().  Ignore the mlist and
1222	 * load each one individually.
1223	 */
1224
1225	/*
1226	 * Keep in mind that each segment could span
1227	 * multiple pages and that these are not always
1228	 * adjacent. The code is no longer adding dvma
1229	 * aliases to the IOMMU.  The STC will not cross
1230	 * page boundaries anyway and a IOMMU table walk
1231	 * vs. what may be a streamed PCI DMA to a ring
1232	 * descriptor is probably a wash.  It eases TLB
1233	 * pressure and in the worst possible case, it is
1234	 * only as bad a non-IOMMUed architecture.  More
1235	 * importantly, the code is not quite as hairy.
1236	 * (It's bad enough as it is.)
1237	 */
1238	left = size;
1239	seg = 0;
1240	for (i = 0; left > 0 && i < nsegs; i++) {
1241		bus_addr_t a, aend;
1242		bus_size_t len = segs[i].ds_len;
1243		bus_addr_t addr = segs[i].ds_addr;
1244		int seg_len = MIN(left, len);
1245
1246		if (len < 1)
1247			continue;
1248
1249		aend = round_page(addr + seg_len);
1250		for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
1251			bus_addr_t pgstart;
1252			bus_addr_t pgend;
1253			int pglen;
1254			int err;
1255
1256			pgstart = MAX(a, addr);
1257			pgend = MIN(a + PAGE_SIZE - 1, addr + seg_len - 1);
1258			pglen = pgend - pgstart + 1;
1259
1260			if (pglen < 1)
1261				continue;
1262
1263			err = iommu_dvmamap_append_range(t, map, pgstart,
1264			    pglen, flags, boundary);
1265			if (err == EFBIG)
1266				return (err);
1267			if (err) {
1268				printf("iomap load seg page: %d for "
1269				    "pa 0x%lx (%lx - %lx for %d/%x\n",
1270				    err, a, pgstart, pgend, pglen, pglen);
1271				return (err);
1272			}
1273
1274		}
1275
1276		left -= seg_len;
1277	}
1278	return (0);
1279}
1280
1281/*
1282 * Populate the iomap from an mlist.  See note for iommu_dvmamap_load()
1283 * regarding page entry exhaustion of the iomap.
1284 */
1285int
1286iommu_dvmamap_load_mlist(bus_dma_tag_t t, struct iommu_state *is,
1287    bus_dmamap_t map, struct pglist *mlist, int flags,
1288    bus_size_t size, bus_size_t boundary)
1289{
1290	struct vm_page *m;
1291	paddr_t pa;
1292	int err;
1293
1294	/*
1295	 * This was allocated with bus_dmamem_alloc.
1296	 * The pages are on an `mlist'.
1297	 */
1298	for (m = TAILQ_FIRST(mlist); m != NULL; m = TAILQ_NEXT(m,pageq)) {
1299		pa = VM_PAGE_TO_PHYS(m);
1300
1301		err = iommu_dvmamap_append_range(t, map, pa,
1302		    MIN(PAGE_SIZE, size), flags, boundary);
1303		if (err == EFBIG)
1304			return (err);
1305		if (err) {
1306			printf("iomap load seg page: %d for pa 0x%lx "
1307			    "(%lx - %lx for %d/%x\n", err, pa, pa,
1308			    pa + PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1309			return (err);
1310		}
1311		if (size < PAGE_SIZE)
1312			break;
1313		size -= PAGE_SIZE;
1314	}
1315
1316	return (0);
1317}
1318
1319/*
1320 * Unload a dvmamap.
1321 */
1322void
1323iommu_dvmamap_unload(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map)
1324{
1325	struct iommu_state *is;
1326	struct iommu_map_state *ims;
1327	bus_addr_t dvmaddr = map->_dm_dvmastart;
1328	bus_size_t sgsize = map->_dm_dvmasize;
1329	int error;
1330
1331	if (ISSET(map->_dm_flags, BUS_DMA_64BIT)) {
1332		bus_dmamap_unload(t->_parent, map);
1333		return;
1334	}
1335
1336	ims = map->_dm_cookie;
1337	is = ims->ims_sb->sb_iommu;
1338
1339	/* Flush the iommu */
1340#ifdef DEBUG
1341	if (dvmaddr == 0) {
1342		printf("iommu_dvmamap_unload: No dvmastart\n");
1343#ifdef DDB
1344		if (iommudebug & IDB_BREAK)
1345			db_enter();
1346#endif
1347		return;
1348	}
1349
1350	iommu_dvmamap_validate_map(t, is, map);
1351
1352	if (iommudebug & IDB_PRINT_MAP)
1353		iommu_dvmamap_print_map(t, is, map);
1354#endif /* DEBUG */
1355
1356	/* Remove the IOMMU entries */
1357	iommu_iomap_unload_map(is, ims);
1358
1359	/* Clear the iomap */
1360	iommu_iomap_clear_pages(ims);
1361
1362	bus_dmamap_unload(t->_parent, map);
1363
1364	/* Mark the mappings as invalid. */
1365	map->dm_mapsize = 0;
1366	map->dm_nsegs = 0;
1367
1368	mtx_enter(&is->is_mtx);
1369	error = extent_free(is->is_dvmamap, dvmaddr, sgsize, EX_NOWAIT);
1370	map->_dm_dvmastart = 0;
1371	map->_dm_dvmasize = 0;
1372	mtx_leave(&is->is_mtx);
1373	if (error != 0)
1374		printf("warning: %ld of DVMA space lost\n", sgsize);
1375}
1376
1377#ifdef DEBUG
1378/*
1379 * Perform internal consistency checking on a dvmamap.
1380 */
1381int
1382iommu_dvmamap_validate_map(bus_dma_tag_t t, struct iommu_state *is,
1383    bus_dmamap_t map)
1384{
1385	int err = 0;
1386	int seg;
1387
1388	if (trunc_page(map->_dm_dvmastart) != map->_dm_dvmastart) {
1389		printf("**** dvmastart address not page aligned: %lx",
1390			map->_dm_dvmastart);
1391		err = 1;
1392	}
1393	if (trunc_page(map->_dm_dvmasize) != map->_dm_dvmasize) {
1394		printf("**** dvmasize not a multiple of page size: %lx",
1395			map->_dm_dvmasize);
1396		err = 1;
1397	}
1398	if (map->_dm_dvmastart < is->is_dvmabase ||
1399	    (round_page(map->_dm_dvmastart + map->_dm_dvmasize) - 1) >
1400	    is->is_dvmaend) {
1401		printf("dvmaddr %lx len %lx out of range %x - %x\n",
1402			    map->_dm_dvmastart, map->_dm_dvmasize,
1403			    is->is_dvmabase, is->is_dvmaend);
1404		err = 1;
1405	}
1406	for (seg = 0; seg < map->dm_nsegs; seg++) {
1407		if (map->dm_segs[seg].ds_addr == 0 ||
1408		    map->dm_segs[seg].ds_len == 0) {
1409			printf("seg %d null segment dvmaddr %lx len %lx for "
1410			    "range %lx len %lx\n",
1411			    seg,
1412			    map->dm_segs[seg].ds_addr,
1413			    map->dm_segs[seg].ds_len,
1414			    map->_dm_dvmastart, map->_dm_dvmasize);
1415			err = 1;
1416		} else if (map->dm_segs[seg].ds_addr < map->_dm_dvmastart ||
1417		    round_page(map->dm_segs[seg].ds_addr +
1418			map->dm_segs[seg].ds_len) >
1419		    map->_dm_dvmastart + map->_dm_dvmasize) {
1420			printf("seg %d dvmaddr %lx len %lx out of "
1421			    "range %lx len %lx\n",
1422			    seg,
1423			    map->dm_segs[seg].ds_addr,
1424			    map->dm_segs[seg].ds_len,
1425			    map->_dm_dvmastart, map->_dm_dvmasize);
1426			err = 1;
1427		}
1428	}
1429
1430	if (err) {
1431		iommu_dvmamap_print_map(t, is, map);
1432#if defined(DDB) && defined(DEBUG)
1433		if (iommudebug & IDB_BREAK)
1434			db_enter();
1435#endif
1436	}
1437
1438	return (err);
1439}
1440#endif /* DEBUG */
1441
1442void
1443iommu_dvmamap_print_map(bus_dma_tag_t t, struct iommu_state *is,
1444    bus_dmamap_t map)
1445{
1446	int seg, i;
1447	long full_len, source_len;
1448	struct mbuf *m;
1449
1450	printf("DVMA %x for %x, mapping %p: dvstart %lx dvsize %lx "
1451	    "size %ld/%lx maxsegsz %lx boundary %lx segcnt %d "
1452	    "flags %x type %d source %p "
1453	    "cookie %p mapsize %lx nsegs %d\n",
1454	    is ? is->is_dvmabase : 0, is ? is->is_dvmaend : 0, map,
1455	    map->_dm_dvmastart, map->_dm_dvmasize,
1456	    map->_dm_size, map->_dm_size, map->_dm_maxsegsz, map->_dm_boundary,
1457	    map->_dm_segcnt, map->_dm_flags, map->_dm_type,
1458	    map->_dm_source, map->_dm_cookie, map->dm_mapsize,
1459	    map->dm_nsegs);
1460
1461	full_len = 0;
1462	for (seg = 0; seg < map->dm_nsegs; seg++) {
1463		printf("seg %d dvmaddr %lx pa %lx len %lx (tte %llx)\n",
1464		    seg, map->dm_segs[seg].ds_addr,
1465		    is ? iommu_extract(is, map->dm_segs[seg].ds_addr) : 0,
1466		    map->dm_segs[seg].ds_len,
1467		    is ? iommu_lookup_tte(is, map->dm_segs[seg].ds_addr) : 0);
1468		full_len += map->dm_segs[seg].ds_len;
1469	}
1470	printf("total length = %ld/0x%lx\n", full_len, full_len);
1471
1472	if (map->_dm_source) switch (map->_dm_type) {
1473	case _DM_TYPE_MBUF:
1474		m = map->_dm_source;
1475		if (m->m_flags & M_PKTHDR)
1476			printf("source PKTHDR mbuf (%p) hdr len = %d/0x%x:\n",
1477			    m, m->m_pkthdr.len, m->m_pkthdr.len);
1478		else
1479			printf("source mbuf (%p):\n", m);
1480
1481		source_len = 0;
1482		for ( ; m; m = m->m_next) {
1483			vaddr_t vaddr = mtod(m, vaddr_t);
1484			long len = m->m_len;
1485			paddr_t pa;
1486
1487			if (pmap_extract(pmap_kernel(), vaddr, &pa))
1488				printf("kva %lx pa %lx len %ld/0x%lx\n",
1489				    vaddr, pa, len, len);
1490			else
1491				printf("kva %lx pa <invalid> len %ld/0x%lx\n",
1492				    vaddr, len, len);
1493
1494			source_len += len;
1495		}
1496
1497		if (full_len != source_len)
1498			printf("mbuf length %ld/0x%lx is %s than mapping "
1499			    "length %ld/0x%lx\n", source_len, source_len,
1500			    (source_len > full_len) ? "greater" : "less",
1501			    full_len, full_len);
1502		else
1503			printf("mbuf length %ld/0x%lx\n", source_len,
1504			    source_len);
1505		break;
1506	case _DM_TYPE_LOAD:
1507	case _DM_TYPE_SEGS:
1508	case _DM_TYPE_UIO:
1509	default:
1510		break;
1511	}
1512
1513	if (!ISSET(map->_dm_flags, BUS_DMA_64BIT) && map->_dm_cookie != NULL) {
1514		struct iommu_map_state *ims = map->_dm_cookie;
1515		struct iommu_page_map *ipm = &ims->ims_map;
1516
1517		printf("page map (%p) of size %d with %d entries\n",
1518		    ipm, ipm->ipm_maxpage, ipm->ipm_pagecnt);
1519		for (i = 0; i < ipm->ipm_pagecnt; ++i) {
1520			struct iommu_page_entry *e = &ipm->ipm_map[i];
1521			printf("%d: vmaddr 0x%lx pa 0x%lx\n", i,
1522			    e->ipe_va, e->ipe_pa);
1523		}
1524	} else
1525		printf("iommu map state (cookie) is NULL\n");
1526}
1527
1528void
1529_iommu_dvmamap_sync(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
1530	bus_addr_t offset, bus_size_t len, int ops)
1531{
1532	struct iommu_state *is;
1533	struct iommu_map_state *ims = map->_dm_cookie;
1534	struct strbuf_ctl *sb;
1535	bus_size_t count;
1536	int i, needsflush = 0;
1537
1538	sb = ims->ims_sb;
1539	is = sb->sb_iommu;
1540
1541	for (i = 0; i < map->dm_nsegs; i++) {
1542		if (offset < map->dm_segs[i].ds_len)
1543			break;
1544		offset -= map->dm_segs[i].ds_len;
1545	}
1546
1547	if (i == map->dm_nsegs)
1548		panic("iommu_dvmamap_sync: too short %lu", offset);
1549
1550	for (; len > 0 && i < map->dm_nsegs; i++) {
1551		count = MIN(map->dm_segs[i].ds_len - offset, len);
1552		if (count > 0 && iommu_dvmamap_sync_range(sb,
1553		    map->dm_segs[i].ds_addr + offset, count))
1554			needsflush = 1;
1555		len -= count;
1556	}
1557
1558#ifdef DIAGNOSTIC
1559	if (i == map->dm_nsegs && len > 0)
1560		panic("iommu_dvmamap_sync: leftover %lu", len);
1561#endif
1562
1563	if (needsflush)
1564		iommu_strbuf_flush_done(ims);
1565}
1566
1567void
1568iommu_dvmamap_sync(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
1569    bus_addr_t offset, bus_size_t len, int ops)
1570{
1571	struct iommu_map_state *ims;
1572
1573	if (len == 0)
1574		return;
1575
1576	if (map->_dm_flags & BUS_DMA_64BIT) {
1577		if (ops & (BUS_DMASYNC_PREWRITE | BUS_DMASYNC_POSTREAD))
1578			__membar("#MemIssue");
1579		return;
1580	}
1581
1582	ims = map->_dm_cookie;
1583
1584	if (ops & BUS_DMASYNC_PREWRITE)
1585		__membar("#MemIssue");
1586
1587	if ((ims->ims_flags & IOMMU_MAP_STREAM) &&
1588	    (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_PREWRITE)))
1589		_iommu_dvmamap_sync(t, t0, map, offset, len, ops);
1590
1591	if (ops & BUS_DMASYNC_POSTREAD)
1592		__membar("#MemIssue");
1593}
1594
1595/*
1596 * Flush an individual dma segment, returns non-zero if the streaming buffers
1597 * need flushing afterwards.
1598 */
1599int
1600iommu_dvmamap_sync_range(struct strbuf_ctl *sb, bus_addr_t va, bus_size_t len)
1601{
1602	bus_addr_t vaend;
1603#ifdef DIAGNOSTIC
1604	struct iommu_state *is = sb->sb_iommu;
1605
1606	if (va < is->is_dvmabase || va > is->is_dvmaend)
1607		panic("invalid va: %llx", (long long)va);
1608
1609	if ((is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)] & IOTTE_STREAM) == 0) {
1610		printf("iommu_dvmamap_sync_range: attempting to flush "
1611		    "non-streaming entry\n");
1612		return (0);
1613	}
1614#endif
1615
1616	vaend = (va + len + PAGE_MASK) & ~PAGE_MASK;
1617	va &= ~PAGE_MASK;
1618
1619#ifdef DIAGNOSTIC
1620	if (va < is->is_dvmabase || (vaend - 1) > is->is_dvmaend)
1621		panic("invalid va range: %llx to %llx (%x to %x)",
1622		    (long long)va, (long long)vaend,
1623		    is->is_dvmabase,
1624		    is->is_dvmaend);
1625#endif
1626
1627	for ( ; va <= vaend; va += PAGE_SIZE) {
1628		DPRINTF(IDB_BUSDMA,
1629		    ("iommu_dvmamap_sync_range: flushing va %p\n",
1630		    (void *)(u_long)va));
1631		iommu_strbuf_flush(sb, va);
1632	}
1633
1634	return (1);
1635}
1636
1637int
1638iommu_dvmamem_alloc(bus_dma_tag_t t, bus_dma_tag_t t0, bus_size_t size,
1639    bus_size_t alignment, bus_size_t boundary, bus_dma_segment_t *segs,
1640    int nsegs, int *rsegs, int flags)
1641{
1642
1643	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_alloc: sz %llx align %llx "
1644	    "bound %llx segp %p flags %d\n", (unsigned long long)size,
1645	    (unsigned long long)alignment, (unsigned long long)boundary,
1646	    segs, flags));
1647
1648	if ((flags & BUS_DMA_64BIT) == 0)
1649		flags |= BUS_DMA_DVMA;
1650
1651	BUS_DMA_FIND_PARENT(t, _dmamem_alloc);
1652	return ((*t->_dmamem_alloc)(t, t0, size, alignment, boundary,
1653	    segs, nsegs, rsegs, flags));
1654}
1655
1656void
1657iommu_dvmamem_free(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dma_segment_t *segs,
1658    int nsegs)
1659{
1660
1661	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_free: segp %p nsegs %d\n",
1662	    segs, nsegs));
1663	BUS_DMA_FIND_PARENT(t, _dmamem_free);
1664	(*t->_dmamem_free)(t, t0, segs, nsegs);
1665}
1666
1667/*
1668 * Create a new iomap.
1669 */
1670struct iommu_map_state *
1671iommu_iomap_create(int n)
1672{
1673	struct iommu_map_state *ims;
1674	struct strbuf_flush *sbf;
1675	vaddr_t va;
1676
1677	/* Safety for heavily fragmented data, such as mbufs */
1678	n += 4;
1679	if (n < 16)
1680		n = 16;
1681
1682	ims = malloc(sizeof(*ims) + (n - 1) * sizeof(ims->ims_map.ipm_map[0]),
1683		M_DEVBUF, M_NOWAIT | M_ZERO);
1684	if (ims == NULL)
1685		return (NULL);
1686
1687	/* Initialize the map. */
1688	ims->ims_map.ipm_maxpage = n;
1689	SPLAY_INIT(&ims->ims_map.ipm_tree);
1690
1691	/* Initialize the flush area. */
1692	sbf = &ims->ims_flush;
1693	va = (vaddr_t)&sbf->sbf_area[0x40];
1694	va &= ~0x3f;
1695	pmap_extract(pmap_kernel(), va, &sbf->sbf_flushpa);
1696	sbf->sbf_flush = (void *)va;
1697
1698	return (ims);
1699}
1700
1701/*
1702 * Destroy an iomap.
1703 */
1704void
1705iommu_iomap_destroy(struct iommu_map_state *ims)
1706{
1707#ifdef DIAGNOSTIC
1708	if (ims->ims_map.ipm_pagecnt > 0)
1709		printf("iommu_iomap_destroy: %d page entries in use\n",
1710		    ims->ims_map.ipm_pagecnt);
1711#endif
1712
1713	free(ims, M_DEVBUF, 0);
1714}
1715
1716/*
1717 * Utility function used by splay tree to order page entries by pa.
1718 */
1719static inline int
1720iomap_compare(struct iommu_page_entry *a, struct iommu_page_entry *b)
1721{
1722	return ((a->ipe_pa > b->ipe_pa) ? 1 :
1723		(a->ipe_pa < b->ipe_pa) ? -1 : 0);
1724}
1725
1726SPLAY_PROTOTYPE(iommu_page_tree, iommu_page_entry, ipe_node, iomap_compare);
1727
1728SPLAY_GENERATE(iommu_page_tree, iommu_page_entry, ipe_node, iomap_compare);
1729
1730/*
1731 * Insert a pa entry in the iomap.
1732 */
1733int
1734iommu_iomap_insert_page(struct iommu_map_state *ims, paddr_t pa)
1735{
1736	struct iommu_page_map *ipm = &ims->ims_map;
1737	struct iommu_page_entry *e;
1738
1739	if (ipm->ipm_pagecnt >= ipm->ipm_maxpage) {
1740		struct iommu_page_entry ipe;
1741
1742		ipe.ipe_pa = pa;
1743		if (SPLAY_FIND(iommu_page_tree, &ipm->ipm_tree, &ipe))
1744			return (0);
1745
1746		return (ENOMEM);
1747	}
1748
1749	e = &ipm->ipm_map[ipm->ipm_pagecnt];
1750
1751	e->ipe_pa = pa;
1752	e->ipe_va = 0;
1753
1754	e = SPLAY_INSERT(iommu_page_tree, &ipm->ipm_tree, e);
1755
1756	/* Duplicates are okay, but only count them once. */
1757	if (e)
1758		return (0);
1759
1760	++ipm->ipm_pagecnt;
1761
1762	return (0);
1763}
1764
1765/*
1766 * Locate the iomap by filling in the pa->va mapping and inserting it
1767 * into the IOMMU tables.
1768 */
1769void
1770iommu_iomap_load_map(struct iommu_state *is, struct iommu_map_state *ims,
1771    bus_addr_t vmaddr, int flags)
1772{
1773	struct iommu_page_map *ipm = &ims->ims_map;
1774	struct iommu_page_entry *e;
1775	struct strbuf_ctl *sb = ims->ims_sb;
1776	int i, slot;
1777
1778	if (sb->sb_flush == NULL)
1779		flags &= ~BUS_DMA_STREAMING;
1780
1781	if (flags & BUS_DMA_STREAMING)
1782		ims->ims_flags |= IOMMU_MAP_STREAM;
1783	else
1784		ims->ims_flags &= ~IOMMU_MAP_STREAM;
1785
1786	for (i = 0, e = ipm->ipm_map; i < ipm->ipm_pagecnt; ++i, ++e) {
1787		e->ipe_va = vmaddr;
1788		iommu_enter(is, sb, e->ipe_va, e->ipe_pa, flags);
1789
1790		/* Flush cache if necessary. */
1791		slot = IOTSBSLOT(e->ipe_va, is->is_tsbsize);
1792		if (ISSET(is->is_hw->ihw_flags, IOMMU_HW_FLUSH_CACHE) &&
1793		    (i == (ipm->ipm_pagecnt - 1) || (slot % 8) == 7))
1794			IOMMUREG_WRITE(is, iommu_cache_flush,
1795			    is->is_ptsb + slot * 8);
1796
1797		vmaddr += PAGE_SIZE;
1798	}
1799}
1800
1801/*
1802 * Remove the iomap from the IOMMU.
1803 */
1804void
1805iommu_iomap_unload_map(struct iommu_state *is, struct iommu_map_state *ims)
1806{
1807	struct iommu_page_map *ipm = &ims->ims_map;
1808	struct iommu_page_entry *e;
1809	struct strbuf_ctl *sb = ims->ims_sb;
1810	int i, slot;
1811
1812	for (i = 0, e = ipm->ipm_map; i < ipm->ipm_pagecnt; ++i, ++e) {
1813		iommu_remove(is, sb, e->ipe_va);
1814
1815		/* Flush cache if necessary. */
1816		slot = IOTSBSLOT(e->ipe_va, is->is_tsbsize);
1817		if (ISSET(is->is_hw->ihw_flags, IOMMU_HW_FLUSH_CACHE) &&
1818		    (i == (ipm->ipm_pagecnt - 1) || (slot % 8) == 7))
1819			IOMMUREG_WRITE(is, iommu_cache_flush,
1820			    is->is_ptsb + slot * 8);
1821	}
1822}
1823
1824/*
1825 * Translate a physical address (pa) into a DVMA address.
1826 */
1827bus_addr_t
1828iommu_iomap_translate(struct iommu_map_state *ims, paddr_t pa)
1829{
1830	struct iommu_page_map *ipm = &ims->ims_map;
1831	struct iommu_page_entry *e;
1832	struct iommu_page_entry pe;
1833	paddr_t offset = pa & PAGE_MASK;
1834
1835	pe.ipe_pa = trunc_page(pa);
1836
1837	e = SPLAY_FIND(iommu_page_tree, &ipm->ipm_tree, &pe);
1838
1839	if (e == NULL)
1840		return (0);
1841
1842	return (e->ipe_va | offset);
1843}
1844
1845/*
1846 * Clear the iomap table and tree.
1847 */
1848void
1849iommu_iomap_clear_pages(struct iommu_map_state *ims)
1850{
1851	ims->ims_map.ipm_pagecnt = 0;
1852	SPLAY_INIT(&ims->ims_map.ipm_tree);
1853}
1854
1855