1/* $NetBSD: cia_dma.c,v 1.38 2024/02/09 22:08:31 andvar Exp $ */
2
3/*-
4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
34
35__KERNEL_RCSID(0, "$NetBSD: cia_dma.c,v 1.38 2024/02/09 22:08:31 andvar Exp $");
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/device.h>
41
42#define _ALPHA_BUS_DMA_PRIVATE
43#include <sys/bus.h>
44
45#include <uvm/uvm_extern.h>
46
47#include <dev/pci/pcireg.h>
48#include <dev/pci/pcivar.h>
49#include <alpha/pci/ciareg.h>
50#include <alpha/pci/ciavar.h>
51
52static bus_dma_tag_t cia_dma_get_tag(bus_dma_tag_t, alpha_bus_t);
53
54static int	cia_bus_dmamap_create_direct(bus_dma_tag_t, bus_size_t, int,
55		    bus_size_t, bus_size_t, int, bus_dmamap_t *);
56
57static int	cia_bus_dmamap_load_sgmap(bus_dma_tag_t, bus_dmamap_t, void *,
58		    bus_size_t, struct proc *, int);
59
60static int	cia_bus_dmamap_load_mbuf_sgmap(bus_dma_tag_t, bus_dmamap_t,
61		    struct mbuf *, int);
62
63static int	cia_bus_dmamap_load_uio_sgmap(bus_dma_tag_t, bus_dmamap_t,
64		    struct uio *, int);
65
66static int	cia_bus_dmamap_load_raw_sgmap(bus_dma_tag_t, bus_dmamap_t,
67		    bus_dma_segment_t *, int, bus_size_t, int);
68
69static void	cia_bus_dmamap_unload_sgmap(bus_dma_tag_t, bus_dmamap_t);
70
71/*
72 * Direct-mapped window: 1G at 1G
73 */
74#define	CIA_DIRECT_MAPPED_BASE	(1UL*1024*1024*1024)
75#define	CIA_DIRECT_MAPPED_SIZE	(1UL*1024*1024*1024)
76
77/*
78 * SGMAP window for ISA: 8M at 8M
79 */
80#define	CIA_SGMAP_MAPPED_LO_BASE (8UL*1024*1024)
81#define	CIA_SGMAP_MAPPED_LO_SIZE (8UL*1024*1024)
82
83/*
84 * SGMAP window for PCI: 1G at 3G
85 */
86#define	CIA_SGMAP_MAPPED_HI_BASE (3UL*1024*1024*1024)
87#define	CIA_SGMAP_MAPPED_HI_SIZE (1UL*1024*1024*1024)
88
89/* ALCOR/ALGOR2/PYXIS have a 256-byte out-bound DMA prefetch threshold. */
90#define	CIA_SGMAP_PFTHRESH	256
91
92static void	cia_tlb_invalidate(void);
93static void	cia_broken_pyxis_tlb_invalidate(void);
94
95static void	(*cia_tlb_invalidate_fn)(void);
96
97#define	CIA_TLB_INVALIDATE()	(*cia_tlb_invalidate_fn)()
98
99struct alpha_sgmap cia_pyxis_bug_sgmap;
100#define	CIA_PYXIS_BUG_BASE	(128UL*1024*1024)
101#define	CIA_PYXIS_BUG_SIZE	(2UL*1024*1024)
102
103static void
104cia_dma_shutdown(void *arg)
105{
106	struct cia_config *ccp = arg;
107	int i;
108
109	/*
110	 * Restore the original values, to make the firmware happy.
111	 */
112	for (i = 0; i < 4; i++) {
113		REGVAL(CIA_PCI_W0BASE + (i * 0x100)) =
114		    ccp->cc_saved_windows.wbase[i];
115		alpha_mb();
116		REGVAL(CIA_PCI_W0MASK + (i * 0x100)) =
117		    ccp->cc_saved_windows.wmask[i];
118		alpha_mb();
119		REGVAL(CIA_PCI_T0BASE + (i * 0x100)) =
120		    ccp->cc_saved_windows.tbase[i];
121		alpha_mb();
122	}
123}
124
125void
126cia_dma_init(struct cia_config *ccp)
127{
128	bus_addr_t tbase;
129	bus_dma_tag_t t;
130	bus_dma_tag_t t_sg_hi = NULL;
131	int i;
132
133	/*
134	 * Save our configuration to restore at shutdown, just
135	 * in case the firmware would get cranky with us.
136	 */
137	for (i = 0; i < 4; i++) {
138		ccp->cc_saved_windows.wbase[i] =
139		    REGVAL(CIA_PCI_W0BASE + (i * 0x100));
140		ccp->cc_saved_windows.wmask[i] =
141		    REGVAL(CIA_PCI_W0MASK + (i * 0x100));
142		ccp->cc_saved_windows.tbase[i] =
143		    REGVAL(CIA_PCI_T0BASE + (i * 0x100));
144	}
145	shutdownhook_establish(cia_dma_shutdown, ccp);
146
147	/*
148	 * If we have more than 1GB of RAM, then set up an sgmap-mapped
149	 * DMA window for PCI.  This is better than using the ISA window,
150	 * which is pretty small and PCI devices could starve it.
151	 *
152	 * N.B. avail_end is "last-usable PFN + 1".
153	 */
154	if (uvm_physseg_get_avail_end(uvm_physseg_get_last()) >
155	    atop(CIA_DIRECT_MAPPED_SIZE)) {
156		t = t_sg_hi = &ccp->cc_dmat_sgmap_hi;
157		t->_cookie = ccp;
158		t->_wbase = CIA_SGMAP_MAPPED_HI_BASE;
159		t->_wsize = CIA_SGMAP_MAPPED_HI_SIZE;
160		t->_next_window = NULL;
161		t->_boundary = 0;
162		t->_sgmap = &ccp->cc_sgmap_hi;
163		t->_pfthresh = CIA_SGMAP_PFTHRESH;
164		t->_get_tag = cia_dma_get_tag;
165		t->_dmamap_create = alpha_sgmap_dmamap_create;
166		t->_dmamap_destroy = alpha_sgmap_dmamap_destroy;
167		t->_dmamap_load = cia_bus_dmamap_load_sgmap;
168		t->_dmamap_load_mbuf = cia_bus_dmamap_load_mbuf_sgmap;
169		t->_dmamap_load_uio = cia_bus_dmamap_load_uio_sgmap;
170		t->_dmamap_load_raw = cia_bus_dmamap_load_raw_sgmap;
171		t->_dmamap_unload = cia_bus_dmamap_unload_sgmap;
172		t->_dmamap_sync = _bus_dmamap_sync;
173
174		t->_dmamem_alloc = _bus_dmamem_alloc;
175		t->_dmamem_free = _bus_dmamem_free;
176		t->_dmamem_map = _bus_dmamem_map;
177		t->_dmamem_unmap = _bus_dmamem_unmap;
178		t->_dmamem_mmap = _bus_dmamem_mmap;
179	}
180
181	/*
182	 * Initialize the DMA tag used for direct-mapped DMA.
183	 */
184	t = &ccp->cc_dmat_direct;
185	t->_cookie = ccp;
186	t->_wbase = CIA_DIRECT_MAPPED_BASE;
187	t->_wsize = CIA_DIRECT_MAPPED_SIZE;
188	t->_next_window = t_sg_hi;
189	t->_boundary = 0;
190	t->_sgmap = NULL;
191	t->_get_tag = cia_dma_get_tag;
192	t->_dmamap_create = cia_bus_dmamap_create_direct;
193	t->_dmamap_destroy = _bus_dmamap_destroy;
194	t->_dmamap_load = _bus_dmamap_load_direct;
195	t->_dmamap_load_mbuf = _bus_dmamap_load_mbuf_direct;
196	t->_dmamap_load_uio = _bus_dmamap_load_uio_direct;
197	t->_dmamap_load_raw = _bus_dmamap_load_raw_direct;
198	t->_dmamap_unload = _bus_dmamap_unload;
199	t->_dmamap_sync = _bus_dmamap_sync;
200
201	t->_dmamem_alloc = _bus_dmamem_alloc;
202	t->_dmamem_free = _bus_dmamem_free;
203	t->_dmamem_map = _bus_dmamem_map;
204	t->_dmamem_unmap = _bus_dmamem_unmap;
205	t->_dmamem_mmap = _bus_dmamem_mmap;
206
207	/*
208	 * Initialize the DMA tag used for sgmap-mapped ISA DMA.
209	 */
210	t = &ccp->cc_dmat_sgmap_lo;
211	t->_cookie = ccp;
212	t->_wbase = CIA_SGMAP_MAPPED_LO_BASE;
213	t->_wsize = CIA_SGMAP_MAPPED_LO_SIZE;
214	t->_next_window = NULL;
215	t->_boundary = 0;
216	t->_sgmap = &ccp->cc_sgmap_lo;
217	t->_pfthresh = CIA_SGMAP_PFTHRESH;
218	t->_get_tag = cia_dma_get_tag;
219	t->_dmamap_create = alpha_sgmap_dmamap_create;
220	t->_dmamap_destroy = alpha_sgmap_dmamap_destroy;
221	t->_dmamap_load = cia_bus_dmamap_load_sgmap;
222	t->_dmamap_load_mbuf = cia_bus_dmamap_load_mbuf_sgmap;
223	t->_dmamap_load_uio = cia_bus_dmamap_load_uio_sgmap;
224	t->_dmamap_load_raw = cia_bus_dmamap_load_raw_sgmap;
225	t->_dmamap_unload = cia_bus_dmamap_unload_sgmap;
226	t->_dmamap_sync = _bus_dmamap_sync;
227
228	t->_dmamem_alloc = _bus_dmamem_alloc;
229	t->_dmamem_free = _bus_dmamem_free;
230	t->_dmamem_map = _bus_dmamem_map;
231	t->_dmamem_unmap = _bus_dmamem_unmap;
232	t->_dmamem_mmap = _bus_dmamem_mmap;
233
234	/*
235	 * The firmware will have set up window 1 as a 1G dirct-mapped
236	 * DMA window beginning at 1G.  While it's pretty safe to assume
237	 * this is the case, we'll go ahead and program the registers
238	 * as we expect as a belt-and-suspenders measure.
239	 */
240	REGVAL(CIA_PCI_W1BASE) = CIA_DIRECT_MAPPED_BASE | CIA_PCI_WnBASE_W_EN;
241	alpha_mb();
242	REGVAL(CIA_PCI_W1MASK) = CIA_PCI_WnMASK_1G;
243	alpha_mb();
244	REGVAL(CIA_PCI_T1BASE) = 0;
245	alpha_mb();
246
247	/*
248	 * Initialize the SGMAP(s).  Must align page table to at least 32k
249	 * (hardware bug?).
250	 */
251	alpha_sgmap_init(t, &ccp->cc_sgmap_lo, "cia_sgmap_lo",
252	    CIA_SGMAP_MAPPED_LO_BASE, 0, CIA_SGMAP_MAPPED_LO_SIZE,
253	    sizeof(uint64_t), NULL, (32*1024));
254	if (t_sg_hi != NULL) {
255		alpha_sgmap_init(t_sg_hi, &ccp->cc_sgmap_hi, "cia_sgmap_hi",
256		    CIA_SGMAP_MAPPED_HI_BASE, 0, CIA_SGMAP_MAPPED_HI_SIZE,
257		    sizeof(uint64_t), NULL, (32*1024));
258	}
259
260	/*
261	 * Set up window 0 as an 8MB SGMAP-mapped window
262	 * starting at 8MB.
263	 */
264	REGVAL(CIA_PCI_W0BASE) = CIA_SGMAP_MAPPED_LO_BASE |
265	    CIA_PCI_WnBASE_SG_EN | CIA_PCI_WnBASE_W_EN;
266	alpha_mb();
267
268	REGVAL(CIA_PCI_W0MASK) = CIA_PCI_WnMASK_8M;
269	alpha_mb();
270
271	tbase = ccp->cc_sgmap_lo.aps_ptpa >> CIA_PCI_TnBASE_SHIFT;
272	if ((tbase & CIA_PCI_TnBASE_MASK) != tbase)
273		panic("cia_dma_init: bad page table address");
274	REGVAL(CIA_PCI_T0BASE) = tbase;
275	alpha_mb();
276
277	/*
278	 * (Maybe) set up window 3 as a 1G SGMAP-mapped window starting
279	 * at 3G.
280	 */
281	if (t_sg_hi != NULL) {
282		REGVAL(CIA_PCI_W3BASE) = CIA_SGMAP_MAPPED_HI_BASE |
283		    CIA_PCI_WnBASE_SG_EN | CIA_PCI_WnBASE_W_EN;
284		alpha_mb();
285
286		REGVAL(CIA_PCI_W3MASK) = CIA_PCI_WnMASK_1G;
287		alpha_mb();
288
289		tbase = ccp->cc_sgmap_hi.aps_ptpa >> CIA_PCI_TnBASE_SHIFT;
290		if ((tbase & CIA_PCI_TnBASE_MASK) != tbase)
291			panic("cia_dma_init: bad page table address");
292		REGVAL(CIA_PCI_T3BASE) = tbase;
293		alpha_mb();
294	} else {
295		REGVAL(CIA_PCI_W3BASE) = 0;
296		alpha_mb();
297	}
298
299	/*
300	 * Pass 1 and 2 (i.e. revision <= 1) of the Pyxis have a
301	 * broken scatter/gather TLB; it cannot be invalidated.  To
302	 * work around this problem, we configure window 2 as an SG
303	 * 2M window at 128M, which we use in DMA loopback mode to
304	 * read a spill page.  This works by causing TLB misses,
305	 * causing the old entries to be purged to make room for
306	 * the new entries coming in for the spill page.
307	 */
308	if ((ccp->cc_flags & CCF_ISPYXIS) != 0 && ccp->cc_rev <= 1) {
309		uint64_t *page_table;
310
311		cia_tlb_invalidate_fn =
312		    cia_broken_pyxis_tlb_invalidate;
313
314		alpha_sgmap_init(t, &cia_pyxis_bug_sgmap,
315		    "pyxis_bug_sgmap", CIA_PYXIS_BUG_BASE, 0,
316		    CIA_PYXIS_BUG_SIZE, sizeof(uint64_t), NULL,
317		    (32*1024));
318
319		REGVAL(CIA_PCI_W2BASE) = CIA_PYXIS_BUG_BASE |
320		    CIA_PCI_WnBASE_SG_EN | CIA_PCI_WnBASE_W_EN;
321		alpha_mb();
322
323		REGVAL(CIA_PCI_W2MASK) = CIA_PCI_WnMASK_2M;
324		alpha_mb();
325
326		tbase = cia_pyxis_bug_sgmap.aps_ptpa >>
327		    CIA_PCI_TnBASE_SHIFT;
328		if ((tbase & CIA_PCI_TnBASE_MASK) != tbase)
329			panic("cia_dma_init: bad page table address");
330		REGVAL(CIA_PCI_T2BASE) = tbase;
331		alpha_mb();
332
333		/*
334		 * Initialize the page table to point at the spill
335		 * page.  Leave the last entry invalid.
336		 */
337		pci_sgmap_pte64_init_spill_page_pte();
338		for (i = 0, page_table = cia_pyxis_bug_sgmap.aps_pt;
339		     i < (CIA_PYXIS_BUG_SIZE / PAGE_SIZE) - 1; i++) {
340			page_table[i] =
341			    pci_sgmap_pte64_prefetch_spill_page_pte;
342		}
343		alpha_mb();
344	} else {
345		REGVAL(CIA_PCI_W2BASE) = 0;
346		alpha_mb();
347
348		cia_tlb_invalidate_fn = cia_tlb_invalidate;
349	}
350
351	CIA_TLB_INVALIDATE();
352}
353
354/*
355 * Return the bus dma tag to be used for the specified bus type.
356 * INTERNAL USE ONLY!
357 */
358static bus_dma_tag_t
359cia_dma_get_tag(bus_dma_tag_t t, alpha_bus_t bustype)
360{
361	struct cia_config *ccp = t->_cookie;
362
363	switch (bustype) {
364	case ALPHA_BUS_PCI:
365	case ALPHA_BUS_EISA:
366		/*
367		 * Regardless if how much memory is installed,
368		 * start with the direct-mapped window.  It will
369		 * fall back to the SGMAP window if we encounter a
370		 * page that is out of range.
371		 */
372		return (&ccp->cc_dmat_direct);
373
374	case ALPHA_BUS_ISA:
375		/*
376		 * ISA doesn't have enough address bits to use
377		 * the direct-mapped DMA window, so we must use
378		 * SGMAPs.
379		 */
380		return (&ccp->cc_dmat_sgmap_lo);
381
382	default:
383		panic("cia_dma_get_tag: shouldn't be here, really...");
384	}
385}
386
387/*
388 * Create a CIA direct-mapped DMA map.
389 */
390static int
391cia_bus_dmamap_create_direct(
392	bus_dma_tag_t t,
393	bus_size_t size,
394	int nsegments,
395	bus_size_t maxsegsz,
396	bus_size_t boundary,
397	int flags,
398	bus_dmamap_t *dmamp)
399{
400	struct cia_config *ccp = t->_cookie;
401	bus_dmamap_t map;
402	int error;
403
404	error = _bus_dmamap_create(t, size, nsegments, maxsegsz,
405	    boundary, flags, dmamp);
406	if (error)
407		return (error);
408
409	map = *dmamp;
410
411	if ((ccp->cc_flags & CCF_PYXISBUG) != 0 &&
412	    map->_dm_segcnt > 1) {
413		/*
414		 * We have a Pyxis with the DMA page crossing bug, make
415		 * sure we don't coalesce adjacent DMA segments.
416		 *
417		 * NOTE: We can only do this if the max segment count
418		 * is greater than 1.  This is because many network
419		 * drivers allocate large contiguous blocks of memory
420		 * for control data structures, even though they won't
421		 * do any single DMA that crosses a page boundary.
422		 *	-- thorpej@NetBSD.org, 2/5/2000
423		 */
424		map->_dm_flags |= DMAMAP_NO_COALESCE;
425	}
426
427	return (0);
428}
429
430/*
431 * Load a CIA SGMAP-mapped DMA map with a linear buffer.
432 */
433static int
434cia_bus_dmamap_load_sgmap(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
435    bus_size_t buflen, struct proc *p, int flags)
436{
437	int error;
438
439	error = pci_sgmap_pte64_load(t, map, buf, buflen, p, flags,
440	    t->_sgmap);
441	if (error == 0)
442		CIA_TLB_INVALIDATE();
443
444	return (error);
445}
446
447/*
448 * Load a CIA SGMAP-mapped DMA map with an mbuf chain.
449 */
450static int
451cia_bus_dmamap_load_mbuf_sgmap(bus_dma_tag_t t, bus_dmamap_t map,
452    struct mbuf *m, int flags)
453{
454	int error;
455
456	error = pci_sgmap_pte64_load_mbuf(t, map, m, flags, t->_sgmap);
457	if (error == 0)
458		CIA_TLB_INVALIDATE();
459
460	return (error);
461}
462
463/*
464 * Load a CIA SGMAP-mapped DMA map with a uio.
465 */
466static int
467cia_bus_dmamap_load_uio_sgmap(bus_dma_tag_t t, bus_dmamap_t map,
468    struct uio *uio, int flags)
469{
470	int error;
471
472	error = pci_sgmap_pte64_load_uio(t, map, uio, flags, t->_sgmap);
473	if (error == 0)
474		CIA_TLB_INVALIDATE();
475
476	return (error);
477}
478
479/*
480 * Load a CIA SGMAP-mapped DMA map with raw memory.
481 */
482static int
483cia_bus_dmamap_load_raw_sgmap(bus_dma_tag_t t, bus_dmamap_t map,
484    bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
485{
486	int error;
487
488	error = pci_sgmap_pte64_load_raw(t, map, segs, nsegs, size, flags,
489	    t->_sgmap);
490	if (error == 0)
491		CIA_TLB_INVALIDATE();
492
493	return (error);
494}
495
496/*
497 * Unload a CIA DMA map.
498 */
499static void
500cia_bus_dmamap_unload_sgmap(bus_dma_tag_t t, bus_dmamap_t map)
501{
502
503	/*
504	 * Invalidate any SGMAP page table entries used by this
505	 * mapping.
506	 */
507	pci_sgmap_pte64_unload(t, map, t->_sgmap);
508	CIA_TLB_INVALIDATE();
509
510	/*
511	 * Do the generic bits of the unload.
512	 */
513	_bus_dmamap_unload_common(t, map);
514}
515
516/*
517 * Flush the CIA scatter/gather TLB.
518 */
519static void
520cia_tlb_invalidate(void)
521{
522
523	alpha_mb();
524	REGVAL(CIA_PCI_TBIA) = CIA_PCI_TBIA_ALL;
525	alpha_mb();
526}
527
528/*
529 * Flush the scatter/gather TLB on broken Pyxis chips.
530 */
531static void
532cia_broken_pyxis_tlb_invalidate(void)
533{
534	uint32_t ctrl;
535	int i, s;
536
537	s = splhigh();
538
539	/*
540	 * Put the Pyxis into PCI loopback mode.
541	 */
542	alpha_mb();
543	ctrl = REGVAL(CIA_CSR_CTRL);
544	REGVAL(CIA_CSR_CTRL) = ctrl | CTRL_PCI_LOOP_EN;
545	alpha_mb();
546
547	/*
548	 * Now, read from PCI dense memory space at offset 128M (our
549	 * target window base), skipping 64k on each read.  This forces
550	 * S/G TLB misses.
551	 *
552	 * XXX Looks like the TLB entries are `not quite LRU'.  We need
553	 * XXX to read more times than there are actual tags!
554	 */
555	for (i = 0; i < CIA_TLB_NTAGS + 4; i++) {
556		volatile uint64_t dummy;
557		dummy = *((volatile uint64_t *)
558		    ALPHA_PHYS_TO_K0SEG(CIA_PCI_DENSE + CIA_PYXIS_BUG_BASE +
559		    (i * 65536)));
560		__USE(dummy);
561	}
562
563	/*
564	 * Restore normal PCI operation.
565	 */
566	alpha_mb();
567	REGVAL(CIA_CSR_CTRL) = ctrl;
568	alpha_mb();
569
570	splx(s);
571}
572