1/* $NetBSD: cia_dma.c,v 1.38 2024/02/09 22:08:31 andvar Exp $ */ 2 3/*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 34 35__KERNEL_RCSID(0, "$NetBSD: cia_dma.c,v 1.38 2024/02/09 22:08:31 andvar Exp $"); 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/kernel.h> 40#include <sys/device.h> 41 42#define _ALPHA_BUS_DMA_PRIVATE 43#include <sys/bus.h> 44 45#include <uvm/uvm_extern.h> 46 47#include <dev/pci/pcireg.h> 48#include <dev/pci/pcivar.h> 49#include <alpha/pci/ciareg.h> 50#include <alpha/pci/ciavar.h> 51 52static bus_dma_tag_t cia_dma_get_tag(bus_dma_tag_t, alpha_bus_t); 53 54static int cia_bus_dmamap_create_direct(bus_dma_tag_t, bus_size_t, int, 55 bus_size_t, bus_size_t, int, bus_dmamap_t *); 56 57static int cia_bus_dmamap_load_sgmap(bus_dma_tag_t, bus_dmamap_t, void *, 58 bus_size_t, struct proc *, int); 59 60static int cia_bus_dmamap_load_mbuf_sgmap(bus_dma_tag_t, bus_dmamap_t, 61 struct mbuf *, int); 62 63static int cia_bus_dmamap_load_uio_sgmap(bus_dma_tag_t, bus_dmamap_t, 64 struct uio *, int); 65 66static int cia_bus_dmamap_load_raw_sgmap(bus_dma_tag_t, bus_dmamap_t, 67 bus_dma_segment_t *, int, bus_size_t, int); 68 69static void cia_bus_dmamap_unload_sgmap(bus_dma_tag_t, bus_dmamap_t); 70 71/* 72 * Direct-mapped window: 1G at 1G 73 */ 74#define CIA_DIRECT_MAPPED_BASE (1UL*1024*1024*1024) 75#define CIA_DIRECT_MAPPED_SIZE (1UL*1024*1024*1024) 76 77/* 78 * SGMAP window for ISA: 8M at 8M 79 */ 80#define CIA_SGMAP_MAPPED_LO_BASE (8UL*1024*1024) 81#define CIA_SGMAP_MAPPED_LO_SIZE (8UL*1024*1024) 82 83/* 84 * SGMAP window for PCI: 1G at 3G 85 */ 86#define CIA_SGMAP_MAPPED_HI_BASE (3UL*1024*1024*1024) 87#define CIA_SGMAP_MAPPED_HI_SIZE (1UL*1024*1024*1024) 88 89/* ALCOR/ALGOR2/PYXIS have a 256-byte out-bound DMA prefetch threshold. */ 90#define CIA_SGMAP_PFTHRESH 256 91 92static void cia_tlb_invalidate(void); 93static void cia_broken_pyxis_tlb_invalidate(void); 94 95static void (*cia_tlb_invalidate_fn)(void); 96 97#define CIA_TLB_INVALIDATE() (*cia_tlb_invalidate_fn)() 98 99struct alpha_sgmap cia_pyxis_bug_sgmap; 100#define CIA_PYXIS_BUG_BASE (128UL*1024*1024) 101#define CIA_PYXIS_BUG_SIZE (2UL*1024*1024) 102 103static void 104cia_dma_shutdown(void *arg) 105{ 106 struct cia_config *ccp = arg; 107 int i; 108 109 /* 110 * Restore the original values, to make the firmware happy. 111 */ 112 for (i = 0; i < 4; i++) { 113 REGVAL(CIA_PCI_W0BASE + (i * 0x100)) = 114 ccp->cc_saved_windows.wbase[i]; 115 alpha_mb(); 116 REGVAL(CIA_PCI_W0MASK + (i * 0x100)) = 117 ccp->cc_saved_windows.wmask[i]; 118 alpha_mb(); 119 REGVAL(CIA_PCI_T0BASE + (i * 0x100)) = 120 ccp->cc_saved_windows.tbase[i]; 121 alpha_mb(); 122 } 123} 124 125void 126cia_dma_init(struct cia_config *ccp) 127{ 128 bus_addr_t tbase; 129 bus_dma_tag_t t; 130 bus_dma_tag_t t_sg_hi = NULL; 131 int i; 132 133 /* 134 * Save our configuration to restore at shutdown, just 135 * in case the firmware would get cranky with us. 136 */ 137 for (i = 0; i < 4; i++) { 138 ccp->cc_saved_windows.wbase[i] = 139 REGVAL(CIA_PCI_W0BASE + (i * 0x100)); 140 ccp->cc_saved_windows.wmask[i] = 141 REGVAL(CIA_PCI_W0MASK + (i * 0x100)); 142 ccp->cc_saved_windows.tbase[i] = 143 REGVAL(CIA_PCI_T0BASE + (i * 0x100)); 144 } 145 shutdownhook_establish(cia_dma_shutdown, ccp); 146 147 /* 148 * If we have more than 1GB of RAM, then set up an sgmap-mapped 149 * DMA window for PCI. This is better than using the ISA window, 150 * which is pretty small and PCI devices could starve it. 151 * 152 * N.B. avail_end is "last-usable PFN + 1". 153 */ 154 if (uvm_physseg_get_avail_end(uvm_physseg_get_last()) > 155 atop(CIA_DIRECT_MAPPED_SIZE)) { 156 t = t_sg_hi = &ccp->cc_dmat_sgmap_hi; 157 t->_cookie = ccp; 158 t->_wbase = CIA_SGMAP_MAPPED_HI_BASE; 159 t->_wsize = CIA_SGMAP_MAPPED_HI_SIZE; 160 t->_next_window = NULL; 161 t->_boundary = 0; 162 t->_sgmap = &ccp->cc_sgmap_hi; 163 t->_pfthresh = CIA_SGMAP_PFTHRESH; 164 t->_get_tag = cia_dma_get_tag; 165 t->_dmamap_create = alpha_sgmap_dmamap_create; 166 t->_dmamap_destroy = alpha_sgmap_dmamap_destroy; 167 t->_dmamap_load = cia_bus_dmamap_load_sgmap; 168 t->_dmamap_load_mbuf = cia_bus_dmamap_load_mbuf_sgmap; 169 t->_dmamap_load_uio = cia_bus_dmamap_load_uio_sgmap; 170 t->_dmamap_load_raw = cia_bus_dmamap_load_raw_sgmap; 171 t->_dmamap_unload = cia_bus_dmamap_unload_sgmap; 172 t->_dmamap_sync = _bus_dmamap_sync; 173 174 t->_dmamem_alloc = _bus_dmamem_alloc; 175 t->_dmamem_free = _bus_dmamem_free; 176 t->_dmamem_map = _bus_dmamem_map; 177 t->_dmamem_unmap = _bus_dmamem_unmap; 178 t->_dmamem_mmap = _bus_dmamem_mmap; 179 } 180 181 /* 182 * Initialize the DMA tag used for direct-mapped DMA. 183 */ 184 t = &ccp->cc_dmat_direct; 185 t->_cookie = ccp; 186 t->_wbase = CIA_DIRECT_MAPPED_BASE; 187 t->_wsize = CIA_DIRECT_MAPPED_SIZE; 188 t->_next_window = t_sg_hi; 189 t->_boundary = 0; 190 t->_sgmap = NULL; 191 t->_get_tag = cia_dma_get_tag; 192 t->_dmamap_create = cia_bus_dmamap_create_direct; 193 t->_dmamap_destroy = _bus_dmamap_destroy; 194 t->_dmamap_load = _bus_dmamap_load_direct; 195 t->_dmamap_load_mbuf = _bus_dmamap_load_mbuf_direct; 196 t->_dmamap_load_uio = _bus_dmamap_load_uio_direct; 197 t->_dmamap_load_raw = _bus_dmamap_load_raw_direct; 198 t->_dmamap_unload = _bus_dmamap_unload; 199 t->_dmamap_sync = _bus_dmamap_sync; 200 201 t->_dmamem_alloc = _bus_dmamem_alloc; 202 t->_dmamem_free = _bus_dmamem_free; 203 t->_dmamem_map = _bus_dmamem_map; 204 t->_dmamem_unmap = _bus_dmamem_unmap; 205 t->_dmamem_mmap = _bus_dmamem_mmap; 206 207 /* 208 * Initialize the DMA tag used for sgmap-mapped ISA DMA. 209 */ 210 t = &ccp->cc_dmat_sgmap_lo; 211 t->_cookie = ccp; 212 t->_wbase = CIA_SGMAP_MAPPED_LO_BASE; 213 t->_wsize = CIA_SGMAP_MAPPED_LO_SIZE; 214 t->_next_window = NULL; 215 t->_boundary = 0; 216 t->_sgmap = &ccp->cc_sgmap_lo; 217 t->_pfthresh = CIA_SGMAP_PFTHRESH; 218 t->_get_tag = cia_dma_get_tag; 219 t->_dmamap_create = alpha_sgmap_dmamap_create; 220 t->_dmamap_destroy = alpha_sgmap_dmamap_destroy; 221 t->_dmamap_load = cia_bus_dmamap_load_sgmap; 222 t->_dmamap_load_mbuf = cia_bus_dmamap_load_mbuf_sgmap; 223 t->_dmamap_load_uio = cia_bus_dmamap_load_uio_sgmap; 224 t->_dmamap_load_raw = cia_bus_dmamap_load_raw_sgmap; 225 t->_dmamap_unload = cia_bus_dmamap_unload_sgmap; 226 t->_dmamap_sync = _bus_dmamap_sync; 227 228 t->_dmamem_alloc = _bus_dmamem_alloc; 229 t->_dmamem_free = _bus_dmamem_free; 230 t->_dmamem_map = _bus_dmamem_map; 231 t->_dmamem_unmap = _bus_dmamem_unmap; 232 t->_dmamem_mmap = _bus_dmamem_mmap; 233 234 /* 235 * The firmware will have set up window 1 as a 1G dirct-mapped 236 * DMA window beginning at 1G. While it's pretty safe to assume 237 * this is the case, we'll go ahead and program the registers 238 * as we expect as a belt-and-suspenders measure. 239 */ 240 REGVAL(CIA_PCI_W1BASE) = CIA_DIRECT_MAPPED_BASE | CIA_PCI_WnBASE_W_EN; 241 alpha_mb(); 242 REGVAL(CIA_PCI_W1MASK) = CIA_PCI_WnMASK_1G; 243 alpha_mb(); 244 REGVAL(CIA_PCI_T1BASE) = 0; 245 alpha_mb(); 246 247 /* 248 * Initialize the SGMAP(s). Must align page table to at least 32k 249 * (hardware bug?). 250 */ 251 alpha_sgmap_init(t, &ccp->cc_sgmap_lo, "cia_sgmap_lo", 252 CIA_SGMAP_MAPPED_LO_BASE, 0, CIA_SGMAP_MAPPED_LO_SIZE, 253 sizeof(uint64_t), NULL, (32*1024)); 254 if (t_sg_hi != NULL) { 255 alpha_sgmap_init(t_sg_hi, &ccp->cc_sgmap_hi, "cia_sgmap_hi", 256 CIA_SGMAP_MAPPED_HI_BASE, 0, CIA_SGMAP_MAPPED_HI_SIZE, 257 sizeof(uint64_t), NULL, (32*1024)); 258 } 259 260 /* 261 * Set up window 0 as an 8MB SGMAP-mapped window 262 * starting at 8MB. 263 */ 264 REGVAL(CIA_PCI_W0BASE) = CIA_SGMAP_MAPPED_LO_BASE | 265 CIA_PCI_WnBASE_SG_EN | CIA_PCI_WnBASE_W_EN; 266 alpha_mb(); 267 268 REGVAL(CIA_PCI_W0MASK) = CIA_PCI_WnMASK_8M; 269 alpha_mb(); 270 271 tbase = ccp->cc_sgmap_lo.aps_ptpa >> CIA_PCI_TnBASE_SHIFT; 272 if ((tbase & CIA_PCI_TnBASE_MASK) != tbase) 273 panic("cia_dma_init: bad page table address"); 274 REGVAL(CIA_PCI_T0BASE) = tbase; 275 alpha_mb(); 276 277 /* 278 * (Maybe) set up window 3 as a 1G SGMAP-mapped window starting 279 * at 3G. 280 */ 281 if (t_sg_hi != NULL) { 282 REGVAL(CIA_PCI_W3BASE) = CIA_SGMAP_MAPPED_HI_BASE | 283 CIA_PCI_WnBASE_SG_EN | CIA_PCI_WnBASE_W_EN; 284 alpha_mb(); 285 286 REGVAL(CIA_PCI_W3MASK) = CIA_PCI_WnMASK_1G; 287 alpha_mb(); 288 289 tbase = ccp->cc_sgmap_hi.aps_ptpa >> CIA_PCI_TnBASE_SHIFT; 290 if ((tbase & CIA_PCI_TnBASE_MASK) != tbase) 291 panic("cia_dma_init: bad page table address"); 292 REGVAL(CIA_PCI_T3BASE) = tbase; 293 alpha_mb(); 294 } else { 295 REGVAL(CIA_PCI_W3BASE) = 0; 296 alpha_mb(); 297 } 298 299 /* 300 * Pass 1 and 2 (i.e. revision <= 1) of the Pyxis have a 301 * broken scatter/gather TLB; it cannot be invalidated. To 302 * work around this problem, we configure window 2 as an SG 303 * 2M window at 128M, which we use in DMA loopback mode to 304 * read a spill page. This works by causing TLB misses, 305 * causing the old entries to be purged to make room for 306 * the new entries coming in for the spill page. 307 */ 308 if ((ccp->cc_flags & CCF_ISPYXIS) != 0 && ccp->cc_rev <= 1) { 309 uint64_t *page_table; 310 311 cia_tlb_invalidate_fn = 312 cia_broken_pyxis_tlb_invalidate; 313 314 alpha_sgmap_init(t, &cia_pyxis_bug_sgmap, 315 "pyxis_bug_sgmap", CIA_PYXIS_BUG_BASE, 0, 316 CIA_PYXIS_BUG_SIZE, sizeof(uint64_t), NULL, 317 (32*1024)); 318 319 REGVAL(CIA_PCI_W2BASE) = CIA_PYXIS_BUG_BASE | 320 CIA_PCI_WnBASE_SG_EN | CIA_PCI_WnBASE_W_EN; 321 alpha_mb(); 322 323 REGVAL(CIA_PCI_W2MASK) = CIA_PCI_WnMASK_2M; 324 alpha_mb(); 325 326 tbase = cia_pyxis_bug_sgmap.aps_ptpa >> 327 CIA_PCI_TnBASE_SHIFT; 328 if ((tbase & CIA_PCI_TnBASE_MASK) != tbase) 329 panic("cia_dma_init: bad page table address"); 330 REGVAL(CIA_PCI_T2BASE) = tbase; 331 alpha_mb(); 332 333 /* 334 * Initialize the page table to point at the spill 335 * page. Leave the last entry invalid. 336 */ 337 pci_sgmap_pte64_init_spill_page_pte(); 338 for (i = 0, page_table = cia_pyxis_bug_sgmap.aps_pt; 339 i < (CIA_PYXIS_BUG_SIZE / PAGE_SIZE) - 1; i++) { 340 page_table[i] = 341 pci_sgmap_pte64_prefetch_spill_page_pte; 342 } 343 alpha_mb(); 344 } else { 345 REGVAL(CIA_PCI_W2BASE) = 0; 346 alpha_mb(); 347 348 cia_tlb_invalidate_fn = cia_tlb_invalidate; 349 } 350 351 CIA_TLB_INVALIDATE(); 352} 353 354/* 355 * Return the bus dma tag to be used for the specified bus type. 356 * INTERNAL USE ONLY! 357 */ 358static bus_dma_tag_t 359cia_dma_get_tag(bus_dma_tag_t t, alpha_bus_t bustype) 360{ 361 struct cia_config *ccp = t->_cookie; 362 363 switch (bustype) { 364 case ALPHA_BUS_PCI: 365 case ALPHA_BUS_EISA: 366 /* 367 * Regardless if how much memory is installed, 368 * start with the direct-mapped window. It will 369 * fall back to the SGMAP window if we encounter a 370 * page that is out of range. 371 */ 372 return (&ccp->cc_dmat_direct); 373 374 case ALPHA_BUS_ISA: 375 /* 376 * ISA doesn't have enough address bits to use 377 * the direct-mapped DMA window, so we must use 378 * SGMAPs. 379 */ 380 return (&ccp->cc_dmat_sgmap_lo); 381 382 default: 383 panic("cia_dma_get_tag: shouldn't be here, really..."); 384 } 385} 386 387/* 388 * Create a CIA direct-mapped DMA map. 389 */ 390static int 391cia_bus_dmamap_create_direct( 392 bus_dma_tag_t t, 393 bus_size_t size, 394 int nsegments, 395 bus_size_t maxsegsz, 396 bus_size_t boundary, 397 int flags, 398 bus_dmamap_t *dmamp) 399{ 400 struct cia_config *ccp = t->_cookie; 401 bus_dmamap_t map; 402 int error; 403 404 error = _bus_dmamap_create(t, size, nsegments, maxsegsz, 405 boundary, flags, dmamp); 406 if (error) 407 return (error); 408 409 map = *dmamp; 410 411 if ((ccp->cc_flags & CCF_PYXISBUG) != 0 && 412 map->_dm_segcnt > 1) { 413 /* 414 * We have a Pyxis with the DMA page crossing bug, make 415 * sure we don't coalesce adjacent DMA segments. 416 * 417 * NOTE: We can only do this if the max segment count 418 * is greater than 1. This is because many network 419 * drivers allocate large contiguous blocks of memory 420 * for control data structures, even though they won't 421 * do any single DMA that crosses a page boundary. 422 * -- thorpej@NetBSD.org, 2/5/2000 423 */ 424 map->_dm_flags |= DMAMAP_NO_COALESCE; 425 } 426 427 return (0); 428} 429 430/* 431 * Load a CIA SGMAP-mapped DMA map with a linear buffer. 432 */ 433static int 434cia_bus_dmamap_load_sgmap(bus_dma_tag_t t, bus_dmamap_t map, void *buf, 435 bus_size_t buflen, struct proc *p, int flags) 436{ 437 int error; 438 439 error = pci_sgmap_pte64_load(t, map, buf, buflen, p, flags, 440 t->_sgmap); 441 if (error == 0) 442 CIA_TLB_INVALIDATE(); 443 444 return (error); 445} 446 447/* 448 * Load a CIA SGMAP-mapped DMA map with an mbuf chain. 449 */ 450static int 451cia_bus_dmamap_load_mbuf_sgmap(bus_dma_tag_t t, bus_dmamap_t map, 452 struct mbuf *m, int flags) 453{ 454 int error; 455 456 error = pci_sgmap_pte64_load_mbuf(t, map, m, flags, t->_sgmap); 457 if (error == 0) 458 CIA_TLB_INVALIDATE(); 459 460 return (error); 461} 462 463/* 464 * Load a CIA SGMAP-mapped DMA map with a uio. 465 */ 466static int 467cia_bus_dmamap_load_uio_sgmap(bus_dma_tag_t t, bus_dmamap_t map, 468 struct uio *uio, int flags) 469{ 470 int error; 471 472 error = pci_sgmap_pte64_load_uio(t, map, uio, flags, t->_sgmap); 473 if (error == 0) 474 CIA_TLB_INVALIDATE(); 475 476 return (error); 477} 478 479/* 480 * Load a CIA SGMAP-mapped DMA map with raw memory. 481 */ 482static int 483cia_bus_dmamap_load_raw_sgmap(bus_dma_tag_t t, bus_dmamap_t map, 484 bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags) 485{ 486 int error; 487 488 error = pci_sgmap_pte64_load_raw(t, map, segs, nsegs, size, flags, 489 t->_sgmap); 490 if (error == 0) 491 CIA_TLB_INVALIDATE(); 492 493 return (error); 494} 495 496/* 497 * Unload a CIA DMA map. 498 */ 499static void 500cia_bus_dmamap_unload_sgmap(bus_dma_tag_t t, bus_dmamap_t map) 501{ 502 503 /* 504 * Invalidate any SGMAP page table entries used by this 505 * mapping. 506 */ 507 pci_sgmap_pte64_unload(t, map, t->_sgmap); 508 CIA_TLB_INVALIDATE(); 509 510 /* 511 * Do the generic bits of the unload. 512 */ 513 _bus_dmamap_unload_common(t, map); 514} 515 516/* 517 * Flush the CIA scatter/gather TLB. 518 */ 519static void 520cia_tlb_invalidate(void) 521{ 522 523 alpha_mb(); 524 REGVAL(CIA_PCI_TBIA) = CIA_PCI_TBIA_ALL; 525 alpha_mb(); 526} 527 528/* 529 * Flush the scatter/gather TLB on broken Pyxis chips. 530 */ 531static void 532cia_broken_pyxis_tlb_invalidate(void) 533{ 534 uint32_t ctrl; 535 int i, s; 536 537 s = splhigh(); 538 539 /* 540 * Put the Pyxis into PCI loopback mode. 541 */ 542 alpha_mb(); 543 ctrl = REGVAL(CIA_CSR_CTRL); 544 REGVAL(CIA_CSR_CTRL) = ctrl | CTRL_PCI_LOOP_EN; 545 alpha_mb(); 546 547 /* 548 * Now, read from PCI dense memory space at offset 128M (our 549 * target window base), skipping 64k on each read. This forces 550 * S/G TLB misses. 551 * 552 * XXX Looks like the TLB entries are `not quite LRU'. We need 553 * XXX to read more times than there are actual tags! 554 */ 555 for (i = 0; i < CIA_TLB_NTAGS + 4; i++) { 556 volatile uint64_t dummy; 557 dummy = *((volatile uint64_t *) 558 ALPHA_PHYS_TO_K0SEG(CIA_PCI_DENSE + CIA_PYXIS_BUG_BASE + 559 (i * 65536))); 560 __USE(dummy); 561 } 562 563 /* 564 * Restore normal PCI operation. 565 */ 566 alpha_mb(); 567 REGVAL(CIA_CSR_CTRL) = ctrl; 568 alpha_mb(); 569 570 splx(s); 571} 572