1/*	$NetBSD: iommu.c,v 1.105 2011/10/08 08:49:07 nakayama Exp $	*/
2
3/*
4 * Copyright (c) 1999, 2000 Matthew R. Green
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/*
30 * Copyright (c) 2001, 2002 Eduardo Horvath
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 * 3. The name of the author may not be used to endorse or promote products
42 *    derived from this software without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
49 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
50 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
51 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
52 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 */
56
57/*
58 * UltraSPARC IOMMU support; used by both the sbus and pci code.
59 */
60
61#include <sys/cdefs.h>
62__KERNEL_RCSID(0, "$NetBSD: iommu.c,v 1.105 2011/10/08 08:49:07 nakayama Exp $");
63
64#include "opt_ddb.h"
65
66#include <sys/param.h>
67#include <sys/extent.h>
68#include <sys/malloc.h>
69#include <sys/systm.h>
70#include <sys/device.h>
71#include <sys/proc.h>
72
73#include <uvm/uvm.h>
74
75#include <sys/bus.h>
76#include <sparc64/dev/iommureg.h>
77#include <sparc64/dev/iommuvar.h>
78
79#include <machine/autoconf.h>
80#include <machine/cpu.h>
81
82#ifdef DEBUG
83#define IDB_BUSDMA	0x1
84#define IDB_IOMMU	0x2
85#define IDB_INFO	0x4
86#define	IDB_SYNC	0x8
87int iommudebug = 0x0;
88#define DPRINTF(l, s)   do { if (iommudebug & l) printf s; } while (0)
89#define IOTTE_DEBUG(n)	(n)
90#else
91#define DPRINTF(l, s)
92#define IOTTE_DEBUG(n)	0
93#endif
94
95#define iommu_strbuf_flush(i, v) do {					\
96	if ((i)->sb_flush)						\
97		bus_space_write_8((i)->sb_is->is_bustag, (i)->sb_sb,	\
98			STRBUFREG(strbuf_pgflush), (v));		\
99	} while (0)
100
101static	int iommu_strbuf_flush_done(struct strbuf_ctl *);
102static	void _iommu_dvmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
103		bus_size_t, int);
104
105/*
106 * initialise the UltraSPARC IOMMU (SBUS or PCI):
107 *	- allocate and setup the iotsb.
108 *	- enable the IOMMU
109 *	- initialise the streaming buffers (if they exist)
110 *	- create a private DVMA map.
111 */
112void
113iommu_init(char *name, struct iommu_state *is, int tsbsize, uint32_t iovabase)
114{
115	psize_t size;
116	vaddr_t va;
117	paddr_t pa;
118	struct vm_page *pg;
119	struct pglist pglist;
120
121	/*
122	 * Setup the iommu.
123	 *
124	 * The sun4u iommu is part of the SBUS or PCI controller so we will
125	 * deal with it here..
126	 *
127	 * For sysio and psycho/psycho+ the IOMMU address space always ends at
128	 * 0xffffe000, but the starting address depends on the size of the
129	 * map.  The map size is 1024 * 2 ^ is->is_tsbsize entries, where each
130	 * entry is 8 bytes.  The start of the map can be calculated by
131	 * (0xffffe000 << (8 + is->is_tsbsize)).
132	 *
133	 * But sabre and hummingbird use a different scheme that seems to
134	 * be hard-wired, so we read the start and size from the PROM and
135	 * just use those values.
136	 */
137	is->is_cr = IOMMUCR_EN;
138	is->is_tsbsize = tsbsize;
139	if (iovabase == -1) {
140		is->is_dvmabase = IOTSB_VSTART(is->is_tsbsize);
141		is->is_dvmaend = IOTSB_VEND - 1;
142	} else {
143		is->is_dvmabase = iovabase;
144		is->is_dvmaend = iovabase + IOTSB_VSIZE(tsbsize) - 1;
145	}
146
147	/*
148	 * Allocate memory for I/O pagetables.  They need to be physically
149	 * contiguous.
150	 */
151
152	size = PAGE_SIZE << is->is_tsbsize;
153	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
154		(paddr_t)PAGE_SIZE, (paddr_t)0, &pglist, 1, 0) != 0)
155		panic("iommu_init: no memory");
156
157	va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY);
158	if (va == 0)
159		panic("iommu_init: no memory");
160	is->is_tsb = (int64_t *)va;
161
162	is->is_ptsb = VM_PAGE_TO_PHYS(TAILQ_FIRST(&pglist));
163
164	/* Map the pages */
165	TAILQ_FOREACH(pg, &pglist, pageq.queue) {
166		pa = VM_PAGE_TO_PHYS(pg);
167		pmap_kenter_pa(va, pa | PMAP_NVC,
168		    VM_PROT_READ | VM_PROT_WRITE, 0);
169		va += PAGE_SIZE;
170	}
171	pmap_update(pmap_kernel());
172	memset(is->is_tsb, 0, size);
173
174#ifdef DEBUG
175	if (iommudebug & IDB_INFO)
176	{
177		/* Probe the iommu */
178
179		printf("iommu cr=%llx tsb=%llx\n",
180			(unsigned long long)bus_space_read_8(is->is_bustag,
181				is->is_iommu,
182				offsetof(struct iommureg, iommu_cr)),
183			(unsigned long long)bus_space_read_8(is->is_bustag,
184				is->is_iommu,
185				offsetof(struct iommureg, iommu_tsb)));
186		printf("TSB base %p phys %llx\n", (void *)is->is_tsb,
187			(unsigned long long)is->is_ptsb);
188		delay(1000000); /* 1 s */
189	}
190#endif
191
192	/*
193	 * Now all the hardware's working we need to allocate a dvma map.
194	 */
195	aprint_debug("DVMA map: %x to %x\n",
196		(unsigned int)is->is_dvmabase,
197		(unsigned int)is->is_dvmaend);
198	aprint_debug("IOTSB: %llx to %llx\n",
199		(unsigned long long)is->is_ptsb,
200		(unsigned long long)(is->is_ptsb + size - 1));
201	is->is_dvmamap = extent_create(name,
202	    is->is_dvmabase, is->is_dvmaend,
203	    0, 0, EX_NOWAIT);
204	/* XXXMRG Check is_dvmamap is valid. */
205
206	/*
207	 * Set the TSB size.  The relevant bits were moved to the TSB
208	 * base register in the PCIe host bridges.
209	 */
210	if (is->is_flags & IOMMU_TSBSIZE_IN_PTSB)
211		is->is_ptsb |= is->is_tsbsize;
212	else
213		is->is_cr |= (is->is_tsbsize << 16);
214
215	/*
216	 * now actually start up the IOMMU
217	 */
218	iommu_reset(is);
219}
220
221/*
222 * Streaming buffers don't exist on the UltraSPARC IIi; we should have
223 * detected that already and disabled them.  If not, we will notice that
224 * they aren't there when the STRBUF_EN bit does not remain.
225 */
226void
227iommu_reset(struct iommu_state *is)
228{
229	int i;
230	struct strbuf_ctl *sb;
231
232	IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb);
233
234	/* Enable IOMMU in diagnostic mode */
235	IOMMUREG_WRITE(is, iommu_cr, is->is_cr|IOMMUCR_DE);
236
237	for (i = 0; i < 2; i++) {
238		if ((sb = is->is_sb[i])) {
239
240			/* Enable diagnostics mode? */
241			bus_space_write_8(is->is_bustag, is->is_sb[i]->sb_sb,
242				STRBUFREG(strbuf_ctl), STRBUF_EN);
243
244			membar_Lookaside();
245
246			/* No streaming buffers? Disable them */
247			if (bus_space_read_8(is->is_bustag,
248				is->is_sb[i]->sb_sb,
249				STRBUFREG(strbuf_ctl)) == 0) {
250				is->is_sb[i]->sb_flush = NULL;
251			} else {
252
253				/*
254				 * locate the pa of the flush buffer.
255				 */
256				if (pmap_extract(pmap_kernel(),
257				     (vaddr_t)is->is_sb[i]->sb_flush,
258				     &is->is_sb[i]->sb_flushpa) == FALSE)
259					is->is_sb[i]->sb_flush = NULL;
260			}
261		}
262	}
263
264	if (is->is_flags & IOMMU_FLUSH_CACHE)
265		IOMMUREG_WRITE(is, iommu_cache_invalidate, -1ULL);
266}
267
268/*
269 * Here are the iommu control routines.
270 */
271void
272iommu_enter(struct strbuf_ctl *sb, vaddr_t va, int64_t pa, int flags)
273{
274	struct iommu_state *is = sb->sb_is;
275	int strbuf = (flags & BUS_DMA_STREAMING);
276	int64_t tte;
277
278#ifdef DIAGNOSTIC
279	if (va < is->is_dvmabase || va > is->is_dvmaend)
280		panic("iommu_enter: va %#lx not in DVMA space", va);
281#endif
282
283	/* Is the streamcache flush really needed? */
284	if (sb->sb_flush)
285		iommu_strbuf_flush(sb, va);
286	else
287		/* If we can't flush the strbuf don't enable it. */
288		strbuf = 0;
289
290	tte = MAKEIOTTE(pa, !(flags & BUS_DMA_NOWRITE),
291		!(flags & BUS_DMA_NOCACHE), (strbuf));
292#ifdef DEBUG
293	tte |= (flags & 0xff000LL)<<(4*8);
294#endif
295
296	is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] = tte;
297	bus_space_write_8(is->is_bustag, is->is_iommu,
298		IOMMUREG(iommu_flush), va);
299	DPRINTF(IDB_IOMMU, ("iommu_enter: slot %d va %lx pa %lx "
300		"TSB[%lx]@%p=%lx\n", (int)IOTSBSLOT(va,is->is_tsbsize),
301		va, (long)pa, (u_long)IOTSBSLOT(va,is->is_tsbsize),
302		(void *)(u_long)&is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)],
303		(u_long)tte));
304}
305
306/*
307 * Find the value of a DVMA address (debug routine).
308 */
309paddr_t
310iommu_extract(struct iommu_state *is, vaddr_t dva)
311{
312	int64_t tte = 0;
313
314	if (dva >= is->is_dvmabase && dva <= is->is_dvmaend)
315		tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
316
317	if ((tte & IOTTE_V) == 0)
318		return ((paddr_t)-1L);
319	return (tte & IOTTE_PAMASK);
320}
321
322/*
323 * iommu_remove: removes mappings created by iommu_enter
324 *
325 * Only demap from IOMMU if flag is set.
326 *
327 * XXX: this function needs better internal error checking.
328 */
329void
330iommu_remove(struct iommu_state *is, vaddr_t va, size_t len)
331{
332	int slot;
333
334#ifdef DIAGNOSTIC
335	if (va < is->is_dvmabase || va > is->is_dvmaend)
336		panic("iommu_remove: va 0x%lx not in DVMA space", (u_long)va);
337	if ((long)(va + len) < (long)va)
338		panic("iommu_remove: va 0x%lx + len 0x%lx wraps",
339		      (long) va, (long) len);
340	if (len & ~0xfffffff)
341		panic("iommu_remove: ridiculous len 0x%lx", (u_long)len);
342#endif
343
344	va = trunc_page(va);
345	DPRINTF(IDB_IOMMU, ("iommu_remove: va %lx TSB[%lx]@%p\n",
346		va, (u_long)IOTSBSLOT(va, is->is_tsbsize),
347		&is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)]));
348	while (len > 0) {
349		DPRINTF(IDB_IOMMU, ("iommu_remove: clearing TSB slot %d "
350			"for va %p size %lx\n",
351			(int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va,
352			(u_long)len));
353		if (len <= PAGE_SIZE)
354			len = 0;
355		else
356			len -= PAGE_SIZE;
357
358#if 0
359		/*
360		 * XXX Zero-ing the entry would not require RMW
361		 *
362		 * Disabling valid bit while a page is used by a device
363		 * causes an uncorrectable DMA error.
364		 * Workaround to avoid an uncorrectable DMA error is
365		 * eliminating the next line, but the page is mapped
366		 * until the next iommu_enter call.
367		 */
368		is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] &= ~IOTTE_V;
369		membar_StoreStore();
370#endif
371		IOMMUREG_WRITE(is, iommu_flush, va);
372
373		/* Flush cache if necessary. */
374		slot = IOTSBSLOT(trunc_page(va), is->is_tsbsize);
375		if ((is->is_flags & IOMMU_FLUSH_CACHE) &&
376		    (len == 0 || (slot % 8) == 7))
377			IOMMUREG_WRITE(is, iommu_cache_flush,
378			    is->is_ptsb + slot * 8);
379
380		va += PAGE_SIZE;
381	}
382}
383
384static int
385iommu_strbuf_flush_done(struct strbuf_ctl *sb)
386{
387	struct iommu_state *is = sb->sb_is;
388	struct timeval cur, flushtimeout;
389
390#define BUMPTIME(t, usec) { \
391	register volatile struct timeval *tp = (t); \
392	register long us; \
393 \
394	tp->tv_usec = us = tp->tv_usec + (usec); \
395	if (us >= 1000000) { \
396		tp->tv_usec = us - 1000000; \
397		tp->tv_sec++; \
398	} \
399}
400
401	if (!sb->sb_flush)
402		return (0);
403
404	/*
405	 * Streaming buffer flushes:
406	 *
407	 *   1 Tell strbuf to flush by storing va to strbuf_pgflush.  If
408	 *     we're not on a cache line boundary (64-bits):
409	 *   2 Store 0 in flag
410	 *   3 Store pointer to flag in flushsync
411	 *   4 wait till flushsync becomes 0x1
412	 *
413	 * If it takes more than .5 sec, something
414	 * went wrong.
415	 */
416
417	*sb->sb_flush = 0;
418	bus_space_write_8(is->is_bustag, sb->sb_sb,
419		STRBUFREG(strbuf_flushsync), sb->sb_flushpa);
420
421	microtime(&flushtimeout);
422	cur = flushtimeout;
423	BUMPTIME(&flushtimeout, 500000); /* 1/2 sec */
424
425	DPRINTF(IDB_IOMMU, ("%s: flush = %lx at va = %lx pa = %lx now="
426		"%"PRIx64":%"PRIx32" until = %"PRIx64":%"PRIx32"\n", __func__,
427		(long)*sb->sb_flush, (long)sb->sb_flush, (long)sb->sb_flushpa,
428		cur.tv_sec, cur.tv_usec,
429		flushtimeout.tv_sec, flushtimeout.tv_usec));
430
431	/* Bypass non-coherent D$ */
432	while ((!ldxa(sb->sb_flushpa, ASI_PHYS_CACHED)) &&
433	       timercmp(&cur, &flushtimeout, <=))
434		microtime(&cur);
435
436#ifdef DIAGNOSTIC
437	if (!ldxa(sb->sb_flushpa, ASI_PHYS_CACHED)) {
438		printf("%s: flush timeout %p, at %p\n", __func__,
439			(void *)(u_long)*sb->sb_flush,
440			(void *)(u_long)sb->sb_flushpa); /* panic? */
441#ifdef DDB
442		Debugger();
443#endif
444	}
445#endif
446	DPRINTF(IDB_IOMMU, ("%s: flushed\n", __func__));
447	return (*sb->sb_flush);
448}
449
450/*
451 * IOMMU DVMA operations, common to SBUS and PCI.
452 */
453int
454iommu_dvmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
455	bus_size_t buflen, struct proc *p, int flags)
456{
457	struct strbuf_ctl *sb = (struct strbuf_ctl *)map->_dm_cookie;
458	struct iommu_state *is = sb->sb_is;
459	int s;
460	int err, needsflush;
461	bus_size_t sgsize;
462	paddr_t curaddr;
463	u_long dvmaddr, sgstart, sgend, bmask;
464	bus_size_t align, boundary, len;
465	vaddr_t vaddr = (vaddr_t)buf;
466	int seg;
467	struct pmap *pmap;
468	int slot;
469
470	if (map->dm_nsegs) {
471		/* Already in use?? */
472#ifdef DIAGNOSTIC
473		printf("iommu_dvmamap_load: map still in use\n");
474#endif
475		bus_dmamap_unload(t, map);
476	}
477
478	/*
479	 * Make sure that on error condition we return "no valid mappings".
480	 */
481	map->dm_nsegs = 0;
482	KASSERT(map->dm_maxsegsz <= map->_dm_maxmaxsegsz);
483
484	if (buflen > map->_dm_size) {
485		DPRINTF(IDB_BUSDMA,
486		    ("iommu_dvmamap_load(): error %d > %d -- "
487		     "map size exceeded!\n", (int)buflen, (int)map->_dm_size));
488		return (EINVAL);
489	}
490
491	sgsize = round_page(buflen + ((int)vaddr & PGOFSET));
492
493	/*
494	 * A boundary presented to bus_dmamem_alloc() takes precedence
495	 * over boundary in the map.
496	 */
497	if ((boundary = (map->dm_segs[0]._ds_boundary)) == 0)
498		boundary = map->_dm_boundary;
499	align = max(map->dm_segs[0]._ds_align, PAGE_SIZE);
500
501	/*
502	 * If our segment size is larger than the boundary we need to
503	 * split the transfer up int little pieces ourselves.
504	 */
505	KASSERT(is->is_dvmamap);
506	s = splhigh();
507	err = extent_alloc(is->is_dvmamap, sgsize, align,
508	    (sgsize > boundary) ? 0 : boundary,
509	    EX_NOWAIT|EX_BOUNDZERO, &dvmaddr);
510	splx(s);
511
512#ifdef DEBUG
513	if (err || (dvmaddr == (u_long)-1)) {
514		printf("iommu_dvmamap_load(): extent_alloc(%d, %x) failed!\n",
515		    (int)sgsize, flags);
516#ifdef DDB
517		Debugger();
518#endif
519	}
520#endif
521	if (err != 0)
522		return (err);
523
524	if (dvmaddr == (u_long)-1)
525		return (ENOMEM);
526
527	/* Set the active DVMA map */
528	map->_dm_dvmastart = dvmaddr;
529	map->_dm_dvmasize = sgsize;
530
531	/*
532	 * Now split the DVMA range into segments, not crossing
533	 * the boundary.
534	 */
535	seg = 0;
536	sgstart = dvmaddr + (vaddr & PGOFSET);
537	sgend = sgstart + buflen - 1;
538	map->dm_segs[seg].ds_addr = sgstart;
539	DPRINTF(IDB_INFO, ("iommu_dvmamap_load: boundary %lx boundary - 1 %lx "
540	    "~(boundary - 1) %lx\n", (long)boundary, (long)(boundary - 1),
541	    (long)~(boundary - 1)));
542	bmask = ~(boundary - 1);
543	while ((sgstart & bmask) != (sgend & bmask) ||
544	       sgend - sgstart + 1 > map->dm_maxsegsz) {
545		/* Oops. We crossed a boundary or large seg. Split the xfer. */
546		len = map->dm_maxsegsz;
547		if ((sgstart & bmask) != (sgend & bmask))
548			len = min(len, boundary - (sgstart & (boundary - 1)));
549		map->dm_segs[seg].ds_len = len;
550		DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
551		    "seg %d start %lx size %lx\n", seg,
552		    (long)map->dm_segs[seg].ds_addr,
553		    (long)map->dm_segs[seg].ds_len));
554		if (++seg >= map->_dm_segcnt) {
555			/* Too many segments.  Fail the operation. */
556			DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
557			    "too many segments %d\n", seg));
558			s = splhigh();
559			err = extent_free(is->is_dvmamap,
560			    dvmaddr, sgsize, EX_NOWAIT);
561			map->_dm_dvmastart = 0;
562			map->_dm_dvmasize = 0;
563			splx(s);
564			if (err != 0)
565				printf("warning: %s: %" PRId64
566				    " of DVMA space lost\n", __func__, sgsize);
567			return (EFBIG);
568		}
569		sgstart += len;
570		map->dm_segs[seg].ds_addr = sgstart;
571	}
572	map->dm_segs[seg].ds_len = sgend - sgstart + 1;
573	DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
574	    "seg %d start %lx size %lx\n", seg,
575	    (long)map->dm_segs[seg].ds_addr, (long)map->dm_segs[seg].ds_len));
576	map->dm_nsegs = seg + 1;
577	map->dm_mapsize = buflen;
578
579	if (p != NULL)
580		pmap = p->p_vmspace->vm_map.pmap;
581	else
582		pmap = pmap_kernel();
583
584	needsflush = 0;
585	for (; buflen > 0; ) {
586
587		/*
588		 * Get the physical address for this page.
589		 */
590		if (pmap_extract(pmap, (vaddr_t)vaddr, &curaddr) == FALSE) {
591#ifdef DIAGNOSTIC
592			printf("iommu_dvmamap_load: pmap_extract failed %lx\n", vaddr);
593#endif
594			bus_dmamap_unload(t, map);
595			return (-1);
596		}
597
598		/*
599		 * Compute the segment size, and adjust counts.
600		 */
601		sgsize = PAGE_SIZE - ((u_long)vaddr & PGOFSET);
602		if (buflen < sgsize)
603			sgsize = buflen;
604
605		DPRINTF(IDB_BUSDMA,
606		    ("iommu_dvmamap_load: map %p loading va %p "
607		    "dva %lx at pa %lx\n",
608		    map, (void *)vaddr, (long)dvmaddr,
609		    (long)trunc_page(curaddr)));
610		iommu_enter(sb, trunc_page(dvmaddr), trunc_page(curaddr),
611		    flags | IOTTE_DEBUG(0x4000));
612		needsflush = 1;
613
614		vaddr += sgsize;
615		buflen -= sgsize;
616
617		/* Flush cache if necessary. */
618		slot = IOTSBSLOT(trunc_page(dvmaddr), is->is_tsbsize);
619		if ((is->is_flags & IOMMU_FLUSH_CACHE) &&
620		    (buflen <= 0 || (slot % 8) == 7))
621			IOMMUREG_WRITE(is, iommu_cache_flush,
622			    is->is_ptsb + slot * 8);
623
624		dvmaddr += PAGE_SIZE;
625	}
626	if (needsflush)
627		iommu_strbuf_flush_done(sb);
628#ifdef DIAGNOSTIC
629	for (seg = 0; seg < map->dm_nsegs; seg++) {
630		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
631			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
632			printf("seg %d dvmaddr %lx out of range %x - %x\n",
633			    seg, (long)map->dm_segs[seg].ds_addr,
634			    is->is_dvmabase, is->is_dvmaend);
635#ifdef DDB
636			Debugger();
637#endif
638		}
639	}
640#endif
641	return (0);
642}
643
644
645void
646iommu_dvmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
647{
648	struct strbuf_ctl *sb = (struct strbuf_ctl *)map->_dm_cookie;
649	struct iommu_state *is = sb->sb_is;
650	int error, s;
651	bus_size_t sgsize = map->_dm_dvmasize;
652
653	/* Flush the iommu */
654#ifdef DEBUG
655	if (!map->_dm_dvmastart) {
656		printf("iommu_dvmamap_unload: No dvmastart is zero\n");
657#ifdef DDB
658		Debugger();
659#endif
660	}
661#endif
662	iommu_remove(is, map->_dm_dvmastart, map->_dm_dvmasize);
663
664	/* Flush the caches */
665	bus_dmamap_unload(t->_parent, map);
666
667	s = splhigh();
668	error = extent_free(is->is_dvmamap, map->_dm_dvmastart,
669		map->_dm_dvmasize, EX_NOWAIT);
670	map->_dm_dvmastart = 0;
671	map->_dm_dvmasize = 0;
672	splx(s);
673	if (error != 0)
674		printf("warning: %s: %" PRId64 " of DVMA space lost\n",
675		    __func__, sgsize);
676
677	/* Clear the map */
678}
679
680
681int
682iommu_dvmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map,
683	bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
684{
685	struct strbuf_ctl *sb = (struct strbuf_ctl *)map->_dm_cookie;
686	struct iommu_state *is = sb->sb_is;
687	struct vm_page *pg;
688	int i, j, s;
689	int left;
690	int err, needsflush;
691	bus_size_t sgsize;
692	paddr_t pa;
693	bus_size_t boundary, align;
694	u_long dvmaddr, sgstart, sgend, bmask;
695	struct pglist *pglist;
696	const int pagesz = PAGE_SIZE;
697	int slot;
698#ifdef DEBUG
699	int npg = 0;
700#endif
701
702	if (map->dm_nsegs) {
703		/* Already in use?? */
704#ifdef DIAGNOSTIC
705		printf("iommu_dvmamap_load_raw: map still in use\n");
706#endif
707		bus_dmamap_unload(t, map);
708	}
709
710	/*
711	 * A boundary presented to bus_dmamem_alloc() takes precedence
712	 * over boundary in the map.
713	 */
714	if ((boundary = segs[0]._ds_boundary) == 0)
715		boundary = map->_dm_boundary;
716
717	align = max(segs[0]._ds_align, pagesz);
718
719	/*
720	 * Make sure that on error condition we return "no valid mappings".
721	 */
722	map->dm_nsegs = 0;
723	/* Count up the total number of pages we need */
724	pa = trunc_page(segs[0].ds_addr);
725	sgsize = 0;
726	left = size;
727	for (i = 0; left > 0 && i < nsegs; i++) {
728		if (round_page(pa) != round_page(segs[i].ds_addr))
729			sgsize = round_page(sgsize) +
730			    (segs[i].ds_addr & PGOFSET);
731		sgsize += min(left, segs[i].ds_len);
732		left -= segs[i].ds_len;
733		pa = segs[i].ds_addr + segs[i].ds_len;
734	}
735	sgsize = round_page(sgsize);
736
737	s = splhigh();
738	/*
739	 * If our segment size is larger than the boundary we need to
740	 * split the transfer up into little pieces ourselves.
741	 */
742	err = extent_alloc(is->is_dvmamap, sgsize, align,
743		(sgsize > boundary) ? 0 : boundary,
744		((flags & BUS_DMA_NOWAIT) == 0 ? EX_WAITOK : EX_NOWAIT) |
745		EX_BOUNDZERO, &dvmaddr);
746	splx(s);
747
748	if (err != 0)
749		return (err);
750
751#ifdef DEBUG
752	if (dvmaddr == (u_long)-1)
753	{
754		printf("iommu_dvmamap_load_raw(): extent_alloc(%d, %x) failed!\n",
755		    (int)sgsize, flags);
756#ifdef DDB
757		Debugger();
758#endif
759	}
760#endif
761	if (dvmaddr == (u_long)-1)
762		return (ENOMEM);
763
764	/* Set the active DVMA map */
765	map->_dm_dvmastart = dvmaddr;
766	map->_dm_dvmasize = sgsize;
767
768	bmask = ~(boundary - 1);
769	if ((pglist = segs[0]._ds_mlist) == NULL) {
770		u_long prev_va = 0UL, last_va = dvmaddr;
771		paddr_t prev_pa = 0;
772		int end = 0, offset;
773		bus_size_t len = size;
774
775		/*
776		 * This segs is made up of individual physical
777		 *  segments, probably by _bus_dmamap_load_uio() or
778		 * _bus_dmamap_load_mbuf().  Ignore the mlist and
779		 * load each one individually.
780		 */
781		j = 0;
782		needsflush = 0;
783		for (i = 0; i < nsegs ; i++) {
784
785			pa = segs[i].ds_addr;
786			offset = (pa & PGOFSET);
787			pa = trunc_page(pa);
788			dvmaddr = trunc_page(dvmaddr);
789			left = min(len, segs[i].ds_len);
790
791			DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: converting "
792				"physseg %d start %lx size %lx\n", i,
793				(long)segs[i].ds_addr, (long)segs[i].ds_len));
794
795			if ((pa == prev_pa) &&
796				((offset != 0) || (end != offset))) {
797				/* We can re-use this mapping */
798				dvmaddr = prev_va;
799			}
800
801			sgstart = dvmaddr + offset;
802			sgend = sgstart + left - 1;
803
804			/* Are the segments virtually adjacent? */
805			if ((j > 0) && (end == offset) &&
806			    ((offset == 0) || (pa == prev_pa)) &&
807			    (map->dm_segs[j-1].ds_len + left <=
808			     map->dm_maxsegsz)) {
809				/* Just append to the previous segment. */
810				map->dm_segs[--j].ds_len += left;
811				/* Restore sgstart for boundary check */
812				sgstart = map->dm_segs[j].ds_addr;
813				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
814					"appending seg %d start %lx size %lx\n", j,
815					(long)map->dm_segs[j].ds_addr,
816					(long)map->dm_segs[j].ds_len));
817			} else {
818				if (j >= map->_dm_segcnt) {
819					iommu_remove(is, map->_dm_dvmastart,
820					    last_va - map->_dm_dvmastart);
821					goto fail;
822				}
823				map->dm_segs[j].ds_addr = sgstart;
824				map->dm_segs[j].ds_len = left;
825				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
826					"seg %d start %lx size %lx\n", j,
827					(long)map->dm_segs[j].ds_addr,
828					(long)map->dm_segs[j].ds_len));
829			}
830			end = (offset + left) & PGOFSET;
831
832			/* Check for boundary issues */
833			while ((sgstart & bmask) != (sgend & bmask)) {
834				/* Need a new segment. */
835				map->dm_segs[j].ds_len =
836					boundary - (sgstart & (boundary - 1));
837				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
838					"seg %d start %lx size %lx\n", j,
839					(long)map->dm_segs[j].ds_addr,
840					(long)map->dm_segs[j].ds_len));
841				if (++j >= map->_dm_segcnt) {
842					iommu_remove(is, map->_dm_dvmastart,
843					    last_va - map->_dm_dvmastart);
844					goto fail;
845				}
846				sgstart += map->dm_segs[j-1].ds_len;
847				map->dm_segs[j].ds_addr = sgstart;
848				map->dm_segs[j].ds_len = sgend - sgstart + 1;
849			}
850
851			if (sgsize == 0)
852				panic("iommu_dmamap_load_raw: size botch");
853
854			/* Now map a series of pages. */
855			while (dvmaddr <= sgend) {
856				DPRINTF(IDB_BUSDMA,
857					("iommu_dvmamap_load_raw: map %p "
858						"loading va %lx at pa %lx\n",
859						map, (long)dvmaddr,
860						(long)(pa)));
861				/* Enter it if we haven't before. */
862				if (prev_va != dvmaddr) {
863					iommu_enter(sb, prev_va = dvmaddr,
864					    prev_pa = pa,
865					    flags | IOTTE_DEBUG(++npg << 12));
866					needsflush = 1;
867
868					/* Flush cache if necessary. */
869					slot = IOTSBSLOT(trunc_page(dvmaddr), is->is_tsbsize);
870					if ((is->is_flags & IOMMU_FLUSH_CACHE) &&
871					    ((dvmaddr + pagesz) > sgend || (slot % 8) == 7))
872						IOMMUREG_WRITE(is, iommu_cache_flush,
873						    is->is_ptsb + slot * 8);
874				}
875
876				dvmaddr += pagesz;
877				pa += pagesz;
878				last_va = dvmaddr;
879			}
880
881			len -= left;
882			++j;
883		}
884		if (needsflush)
885			iommu_strbuf_flush_done(sb);
886
887		map->dm_mapsize = size;
888		map->dm_nsegs = j;
889#ifdef DIAGNOSTIC
890		{ int seg;
891	for (seg = 0; seg < map->dm_nsegs; seg++) {
892		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
893		    map->dm_segs[seg].ds_addr > is->is_dvmaend) {
894			printf("seg %d dvmaddr %lx out of range %x - %x\n",
895				seg, (long)map->dm_segs[seg].ds_addr,
896				is->is_dvmabase, is->is_dvmaend);
897#ifdef DDB
898			Debugger();
899#endif
900		}
901	}
902		}
903#endif
904		return (0);
905	}
906
907	/*
908	 * This was allocated with bus_dmamem_alloc.
909	 * The pages are on a `pglist'.
910	 */
911	i = 0;
912	sgstart = dvmaddr;
913	sgend = sgstart + size - 1;
914	map->dm_segs[i].ds_addr = sgstart;
915	while ((sgstart & bmask) != (sgend & bmask)) {
916		/* Oops.  We crossed a boundary.  Split the xfer. */
917		map->dm_segs[i].ds_len = boundary - (sgstart & (boundary - 1));
918		DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
919			"seg %d start %lx size %lx\n", i,
920			(long)map->dm_segs[i].ds_addr,
921			(long)map->dm_segs[i].ds_len));
922		if (++i >= map->_dm_segcnt) {
923			/* Too many segments.  Fail the operation. */
924			goto fail;
925		}
926		sgstart += map->dm_segs[i-1].ds_len;
927		map->dm_segs[i].ds_addr = sgstart;
928	}
929	DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
930			"seg %d start %lx size %lx\n", i,
931			(long)map->dm_segs[i].ds_addr, (long)map->dm_segs[i].ds_len));
932	map->dm_segs[i].ds_len = sgend - sgstart + 1;
933
934	needsflush = 0;
935	TAILQ_FOREACH(pg, pglist, pageq.queue) {
936		if (sgsize == 0)
937			panic("iommu_dmamap_load_raw: size botch");
938		pa = VM_PAGE_TO_PHYS(pg);
939
940		DPRINTF(IDB_BUSDMA,
941		    ("iommu_dvmamap_load_raw: map %p loading va %lx at pa %lx\n",
942		    map, (long)dvmaddr, (long)(pa)));
943		iommu_enter(sb, dvmaddr, pa, flags | IOTTE_DEBUG(0x8000));
944		needsflush = 1;
945
946		sgsize -= pagesz;
947
948		/* Flush cache if necessary. */
949		slot = IOTSBSLOT(trunc_page(dvmaddr), is->is_tsbsize);
950		if ((is->is_flags & IOMMU_FLUSH_CACHE) &&
951		    (sgsize == 0 || (slot % 8) == 7))
952			IOMMUREG_WRITE(is, iommu_cache_flush,
953			    is->is_ptsb + slot * 8);
954
955		dvmaddr += pagesz;
956	}
957	if (needsflush)
958		iommu_strbuf_flush_done(sb);
959	map->dm_mapsize = size;
960	map->dm_nsegs = i+1;
961#ifdef DIAGNOSTIC
962	{ int seg;
963	for (seg = 0; seg < map->dm_nsegs; seg++) {
964		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
965			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
966			printf("seg %d dvmaddr %lx out of range %x - %x\n",
967				seg, (long)map->dm_segs[seg].ds_addr,
968				is->is_dvmabase, is->is_dvmaend);
969#ifdef DDB
970			Debugger();
971#endif
972		}
973	}
974	}
975#endif
976	return (0);
977
978fail:
979	s = splhigh();
980	err = extent_free(is->is_dvmamap, map->_dm_dvmastart, sgsize,
981	    EX_NOWAIT);
982	map->_dm_dvmastart = 0;
983	map->_dm_dvmasize = 0;
984	splx(s);
985	if (err != 0)
986		printf("warning: %s: %" PRId64 " of DVMA space lost\n",
987		    __func__, sgsize);
988	return (EFBIG);
989}
990
991
992/*
993 * Flush an individual dma segment, returns non-zero if the streaming buffers
994 * need flushing afterwards.
995 */
996static int
997iommu_dvmamap_sync_range(struct strbuf_ctl *sb, vaddr_t va, bus_size_t len)
998{
999	vaddr_t vaend;
1000	struct iommu_state *is = sb->sb_is;
1001
1002#ifdef DIAGNOSTIC
1003	if (va < is->is_dvmabase || va > is->is_dvmaend)
1004		panic("invalid va: %llx", (long long)va);
1005#endif
1006
1007	if ((is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)] & IOTTE_STREAM) == 0) {
1008		DPRINTF(IDB_SYNC,
1009			("iommu_dvmamap_sync_range: attempting to flush "
1010			 "non-streaming entry\n"));
1011		return (0);
1012	}
1013
1014	vaend = round_page(va + len) - 1;
1015	va = trunc_page(va);
1016
1017#ifdef DIAGNOSTIC
1018	if (va < is->is_dvmabase || vaend > is->is_dvmaend)
1019		panic("invalid va range: %llx to %llx (%x to %x)",
1020		    (long long)va, (long long)vaend,
1021		    is->is_dvmabase,
1022		    is->is_dvmaend);
1023#endif
1024
1025	for ( ; va <= vaend; va += PAGE_SIZE) {
1026		DPRINTF(IDB_SYNC,
1027		    ("iommu_dvmamap_sync_range: flushing va %p\n",
1028		    (void *)(u_long)va));
1029		iommu_strbuf_flush(sb, va);
1030	}
1031
1032	return (1);
1033}
1034
1035static void
1036_iommu_dvmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
1037	bus_size_t len, int ops)
1038{
1039	struct strbuf_ctl *sb = (struct strbuf_ctl *)map->_dm_cookie;
1040	bus_size_t count;
1041	int i, needsflush = 0;
1042
1043	if (!sb->sb_flush)
1044		return;
1045
1046	for (i = 0; i < map->dm_nsegs; i++) {
1047		if (offset < map->dm_segs[i].ds_len)
1048			break;
1049		offset -= map->dm_segs[i].ds_len;
1050	}
1051
1052	if (i == map->dm_nsegs)
1053		panic("%s: segment too short %llu", __func__,
1054		    (unsigned long long)offset);
1055
1056	if (ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_POSTWRITE)) {
1057		/* Nothing to do */;
1058	}
1059
1060	if (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_PREWRITE)) {
1061
1062		for (; len > 0 && i < map->dm_nsegs; i++) {
1063			count = MIN(map->dm_segs[i].ds_len - offset, len);
1064			if (count > 0 &&
1065			    iommu_dvmamap_sync_range(sb,
1066				map->dm_segs[i].ds_addr + offset, count))
1067				needsflush = 1;
1068			offset = 0;
1069			len -= count;
1070		}
1071#ifdef DIAGNOSTIC
1072		if (i == map->dm_nsegs && len > 0)
1073			panic("%s: leftover %llu", __func__,
1074			    (unsigned long long)len);
1075#endif
1076
1077		if (needsflush)
1078			iommu_strbuf_flush_done(sb);
1079	}
1080}
1081
1082void
1083iommu_dvmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
1084	bus_size_t len, int ops)
1085{
1086
1087	/* If len is 0, then there is nothing to do */
1088	if (len == 0)
1089		return;
1090
1091	if (ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) {
1092		/* Flush the CPU then the IOMMU */
1093		bus_dmamap_sync(t->_parent, map, offset, len, ops);
1094		_iommu_dvmamap_sync(t, map, offset, len, ops);
1095	}
1096	if (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)) {
1097		/* Flush the IOMMU then the CPU */
1098		_iommu_dvmamap_sync(t, map, offset, len, ops);
1099		bus_dmamap_sync(t->_parent, map, offset, len, ops);
1100	}
1101}
1102
1103int
1104iommu_dvmamem_alloc(bus_dma_tag_t t, bus_size_t size, bus_size_t alignment,
1105	bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
1106	int flags)
1107{
1108
1109	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_alloc: sz %llx align %llx bound %llx "
1110	   "segp %p flags %d\n", (unsigned long long)size,
1111	   (unsigned long long)alignment, (unsigned long long)boundary,
1112	   segs, flags));
1113	return (bus_dmamem_alloc(t->_parent, size, alignment, boundary,
1114	    segs, nsegs, rsegs, flags|BUS_DMA_DVMA));
1115}
1116
1117void
1118iommu_dvmamem_free(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs)
1119{
1120
1121	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_free: segp %p nsegs %d\n",
1122	    segs, nsegs));
1123	bus_dmamem_free(t->_parent, segs, nsegs);
1124}
1125
1126/*
1127 * Map the DVMA mappings into the kernel pmap.
1128 * Check the flags to see whether we're streaming or coherent.
1129 */
1130int
1131iommu_dvmamem_map(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs,
1132	size_t size, void **kvap, int flags)
1133{
1134	struct vm_page *pg;
1135	vaddr_t va;
1136	bus_addr_t addr;
1137	struct pglist *pglist;
1138	int cbit;
1139	const uvm_flag_t kmflags =
1140	    (flags & BUS_DMA_NOWAIT) != 0 ? UVM_KMF_NOWAIT : 0;
1141
1142	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_map: segp %p nsegs %d size %lx\n",
1143	    segs, nsegs, size));
1144
1145	/*
1146	 * Allocate some space in the kernel map, and then map these pages
1147	 * into this space.
1148	 */
1149	size = round_page(size);
1150	va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY | kmflags);
1151	if (va == 0)
1152		return (ENOMEM);
1153
1154	*kvap = (void *)va;
1155
1156	/*
1157	 * digest flags:
1158	 */
1159	cbit = 0;
1160	if (flags & BUS_DMA_COHERENT)	/* Disable vcache */
1161		cbit |= PMAP_NVC;
1162	if (flags & BUS_DMA_NOCACHE)	/* side effects */
1163		cbit |= PMAP_NC;
1164
1165	/*
1166	 * Now take this and map it into the CPU.
1167	 */
1168	pglist = segs[0]._ds_mlist;
1169	TAILQ_FOREACH(pg, pglist, pageq.queue) {
1170#ifdef DIAGNOSTIC
1171		if (size == 0)
1172			panic("iommu_dvmamem_map: size botch");
1173#endif
1174		addr = VM_PAGE_TO_PHYS(pg);
1175		DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_map: "
1176		    "mapping va %lx at %llx\n", va, (unsigned long long)addr | cbit));
1177		pmap_kenter_pa(va, addr | cbit,
1178		    VM_PROT_READ | VM_PROT_WRITE, 0);
1179		va += PAGE_SIZE;
1180		size -= PAGE_SIZE;
1181	}
1182	pmap_update(pmap_kernel());
1183	return (0);
1184}
1185
1186/*
1187 * Unmap DVMA mappings from kernel
1188 */
1189void
1190iommu_dvmamem_unmap(bus_dma_tag_t t, void *kva, size_t size)
1191{
1192
1193	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_unmap: kvm %p size %lx\n",
1194	    kva, size));
1195
1196#ifdef DIAGNOSTIC
1197	if ((u_long)kva & PGOFSET)
1198		panic("iommu_dvmamem_unmap");
1199#endif
1200
1201	size = round_page(size);
1202	pmap_kremove((vaddr_t)kva, size);
1203	pmap_update(pmap_kernel());
1204	uvm_km_free(kernel_map, (vaddr_t)kva, size, UVM_KMF_VAONLY);
1205}
1206