1/* 2** IA64 System Bus Adapter (SBA) I/O MMU manager 3** 4** (c) Copyright 2002-2005 Alex Williamson 5** (c) Copyright 2002-2003 Grant Grundler 6** (c) Copyright 2002-2005 Hewlett-Packard Company 7** 8** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code) 9** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code) 10** 11** This program is free software; you can redistribute it and/or modify 12** it under the terms of the GNU General Public License as published by 13** the Free Software Foundation; either version 2 of the License, or 14** (at your option) any later version. 15** 16** 17** This module initializes the IOC (I/O Controller) found on HP 18** McKinley machines and their successors. 19** 20*/ 21 22#include <linux/types.h> 23#include <linux/kernel.h> 24#include <linux/module.h> 25#include <linux/spinlock.h> 26#include <linux/slab.h> 27#include <linux/init.h> 28#include <linux/mm.h> 29#include <linux/string.h> 30#include <linux/pci.h> 31#include <linux/proc_fs.h> 32#include <linux/seq_file.h> 33#include <linux/acpi.h> 34#include <linux/efi.h> 35#include <linux/nodemask.h> 36#include <linux/bitops.h> /* hweight64() */ 37 38#include <asm/delay.h> /* ia64_get_itc() */ 39#include <asm/io.h> 40#include <asm/page.h> /* PAGE_OFFSET */ 41#include <asm/dma.h> 42#include <asm/system.h> /* wmb() */ 43 44#include <asm/acpi-ext.h> 45 46#define PFX "IOC: " 47 48/* 49** Enabling timing search of the pdir resource map. Output in /proc. 50** Disabled by default to optimize performance. 51*/ 52#undef PDIR_SEARCH_TIMING 53 54/* 55** This option allows cards capable of 64bit DMA to bypass the IOMMU. If 56** not defined, all DMA will be 32bit and go through the TLB. 57** There's potentially a conflict in the bio merge code with us 58** advertising an iommu, but then bypassing it. Since I/O MMU bypassing 59** appears to give more performance than bio-level virtual merging, we'll 60** do the former for now. NOTE: BYPASS_SG also needs to be undef'd to 61** completely restrict DMA to the IOMMU. 62*/ 63#define ALLOW_IOV_BYPASS 64 65/* 66** This option specifically allows/disallows bypassing scatterlists with 67** multiple entries. Coalescing these entries can allow better DMA streaming 68** and in some cases shows better performance than entirely bypassing the 69** IOMMU. Performance increase on the order of 1-2% sequential output/input 70** using bonnie++ on a RAID0 MD device (sym2 & mpt). 71*/ 72#undef ALLOW_IOV_BYPASS_SG 73 74/* 75** If a device prefetches beyond the end of a valid pdir entry, it will cause 76** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should 77** disconnect on 4k boundaries and prevent such issues. If the device is 78** particularly aggressive, this option will keep the entire pdir valid such 79** that prefetching will hit a valid address. This could severely impact 80** error containment, and is therefore off by default. The page that is 81** used for spill-over is poisoned, so that should help debugging somewhat. 82*/ 83#undef FULL_VALID_PDIR 84 85#define ENABLE_MARK_CLEAN 86 87/* 88** The number of debug flags is a clue - this code is fragile. NOTE: since 89** tightening the use of res_lock the resource bitmap and actual pdir are no 90** longer guaranteed to stay in sync. The sanity checking code isn't going to 91** like that. 92*/ 93#undef DEBUG_SBA_INIT 94#undef DEBUG_SBA_RUN 95#undef DEBUG_SBA_RUN_SG 96#undef DEBUG_SBA_RESOURCE 97#undef ASSERT_PDIR_SANITY 98#undef DEBUG_LARGE_SG_ENTRIES 99#undef DEBUG_BYPASS 100 101#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY) 102#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive 103#endif 104 105#define SBA_INLINE __inline__ 106/* #define SBA_INLINE */ 107 108#ifdef DEBUG_SBA_INIT 109#define DBG_INIT(x...) printk(x) 110#else 111#define DBG_INIT(x...) 112#endif 113 114#ifdef DEBUG_SBA_RUN 115#define DBG_RUN(x...) printk(x) 116#else 117#define DBG_RUN(x...) 118#endif 119 120#ifdef DEBUG_SBA_RUN_SG 121#define DBG_RUN_SG(x...) printk(x) 122#else 123#define DBG_RUN_SG(x...) 124#endif 125 126 127#ifdef DEBUG_SBA_RESOURCE 128#define DBG_RES(x...) printk(x) 129#else 130#define DBG_RES(x...) 131#endif 132 133#ifdef DEBUG_BYPASS 134#define DBG_BYPASS(x...) printk(x) 135#else 136#define DBG_BYPASS(x...) 137#endif 138 139#ifdef ASSERT_PDIR_SANITY 140#define ASSERT(expr) \ 141 if(!(expr)) { \ 142 printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \ 143 panic(#expr); \ 144 } 145#else 146#define ASSERT(expr) 147#endif 148 149/* 150** The number of pdir entries to "free" before issuing 151** a read to PCOM register to flush out PCOM writes. 152** Interacts with allocation granularity (ie 4 or 8 entries 153** allocated and free'd/purged at a time might make this 154** less interesting). 155*/ 156#define DELAYED_RESOURCE_CNT 64 157 158#define PCI_DEVICE_ID_HP_SX2000_IOC 0x12ec 159 160#define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP) 161#define ZX2_IOC_ID ((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP) 162#define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP) 163#define SX1000_IOC_ID ((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP) 164#define SX2000_IOC_ID ((PCI_DEVICE_ID_HP_SX2000_IOC << 16) | PCI_VENDOR_ID_HP) 165 166#define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */ 167 168#define IOC_FUNC_ID 0x000 169#define IOC_FCLASS 0x008 /* function class, bist, header, rev... */ 170#define IOC_IBASE 0x300 /* IO TLB */ 171#define IOC_IMASK 0x308 172#define IOC_PCOM 0x310 173#define IOC_TCNFG 0x318 174#define IOC_PDIR_BASE 0x320 175 176#define IOC_ROPE0_CFG 0x500 177#define IOC_ROPE_AO 0x10 /* Allow "Relaxed Ordering" */ 178 179 180/* AGP GART driver looks for this */ 181#define ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL 182 183/* 184** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register) 185** 186** Some IOCs (sx1000) can run at the above pages sizes, but are 187** really only supported using the IOC at a 4k page size. 188** 189** iovp_size could only be greater than PAGE_SIZE if we are 190** confident the drivers really only touch the next physical 191** page iff that driver instance owns it. 192*/ 193static unsigned long iovp_size; 194static unsigned long iovp_shift; 195static unsigned long iovp_mask; 196 197struct ioc { 198 void __iomem *ioc_hpa; /* I/O MMU base address */ 199 char *res_map; /* resource map, bit == pdir entry */ 200 u64 *pdir_base; /* physical base address */ 201 unsigned long ibase; /* pdir IOV Space base */ 202 unsigned long imask; /* pdir IOV Space mask */ 203 204 unsigned long *res_hint; /* next avail IOVP - circular search */ 205 unsigned long dma_mask; 206 spinlock_t res_lock; /* protects the resource bitmap, but must be held when */ 207 /* clearing pdir to prevent races with allocations. */ 208 unsigned int res_bitshift; /* from the RIGHT! */ 209 unsigned int res_size; /* size of resource map in bytes */ 210#ifdef CONFIG_NUMA 211 unsigned int node; /* node where this IOC lives */ 212#endif 213#if DELAYED_RESOURCE_CNT > 0 214 spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */ 215 /* than res_lock for bigger systems. */ 216 int saved_cnt; 217 struct sba_dma_pair { 218 dma_addr_t iova; 219 size_t size; 220 } saved[DELAYED_RESOURCE_CNT]; 221#endif 222 223#ifdef PDIR_SEARCH_TIMING 224#define SBA_SEARCH_SAMPLE 0x100 225 unsigned long avg_search[SBA_SEARCH_SAMPLE]; 226 unsigned long avg_idx; /* current index into avg_search */ 227#endif 228 229 /* Stuff we don't need in performance path */ 230 struct ioc *next; /* list of IOC's in system */ 231 acpi_handle handle; /* for multiple IOC's */ 232 const char *name; 233 unsigned int func_id; 234 unsigned int rev; /* HW revision of chip */ 235 u32 iov_size; 236 unsigned int pdir_size; /* in bytes, determined by IOV Space size */ 237 struct pci_dev *sac_only_dev; 238}; 239 240static struct ioc *ioc_list; 241static int reserve_sba_gart = 1; 242 243static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t); 244static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t); 245 246#define sba_sg_address(sg) (page_address((sg)->page) + (sg)->offset) 247 248#ifdef FULL_VALID_PDIR 249static u64 prefetch_spill_page; 250#endif 251 252#ifdef CONFIG_PCI 253# define GET_IOC(dev) (((dev)->bus == &pci_bus_type) \ 254 ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL) 255#else 256# define GET_IOC(dev) NULL 257#endif 258 259/* 260** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up 261** (or rather not merge) DMAs into manageable chunks. 262** On parisc, this is more of the software/tuning constraint 263** rather than the HW. I/O MMU allocation algorithms can be 264** faster with smaller sizes (to some degree). 265*/ 266#define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size) 267 268#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1)) 269 270/************************************ 271** SBA register read and write support 272** 273** BE WARNED: register writes are posted. 274** (ie follow writes which must reach HW with a read) 275** 276*/ 277#define READ_REG(addr) __raw_readq(addr) 278#define WRITE_REG(val, addr) __raw_writeq(val, addr) 279 280#ifdef DEBUG_SBA_INIT 281 282/** 283 * sba_dump_tlb - debugging only - print IOMMU operating parameters 284 * @hpa: base address of the IOMMU 285 * 286 * Print the size/location of the IO MMU PDIR. 287 */ 288static void 289sba_dump_tlb(char *hpa) 290{ 291 DBG_INIT("IO TLB at 0x%p\n", (void *)hpa); 292 DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE)); 293 DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK)); 294 DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG)); 295 DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE)); 296 DBG_INIT("\n"); 297} 298#endif 299 300 301#ifdef ASSERT_PDIR_SANITY 302 303/** 304 * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry 305 * @ioc: IO MMU structure which owns the pdir we are interested in. 306 * @msg: text to print ont the output line. 307 * @pide: pdir index. 308 * 309 * Print one entry of the IO MMU PDIR in human readable form. 310 */ 311static void 312sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide) 313{ 314 /* start printing from lowest pde in rval */ 315 u64 *ptr = &ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)]; 316 unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)]; 317 uint rcnt; 318 319 printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n", 320 msg, rptr, pide & (BITS_PER_LONG - 1), *rptr); 321 322 rcnt = 0; 323 while (rcnt < BITS_PER_LONG) { 324 printk(KERN_DEBUG "%s %2d %p %016Lx\n", 325 (rcnt == (pide & (BITS_PER_LONG - 1))) 326 ? " -->" : " ", 327 rcnt, ptr, (unsigned long long) *ptr ); 328 rcnt++; 329 ptr++; 330 } 331 printk(KERN_DEBUG "%s", msg); 332} 333 334 335/** 336 * sba_check_pdir - debugging only - consistency checker 337 * @ioc: IO MMU structure which owns the pdir we are interested in. 338 * @msg: text to print ont the output line. 339 * 340 * Verify the resource map and pdir state is consistent 341 */ 342static int 343sba_check_pdir(struct ioc *ioc, char *msg) 344{ 345 u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]); 346 u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */ 347 u64 *pptr = ioc->pdir_base; /* pdir ptr */ 348 uint pide = 0; 349 350 while (rptr < rptr_end) { 351 u64 rval; 352 int rcnt; /* number of bits we might check */ 353 354 rval = *rptr; 355 rcnt = 64; 356 357 while (rcnt) { 358 /* Get last byte and highest bit from that */ 359 u32 pde = ((u32)((*pptr >> (63)) & 0x1)); 360 if ((rval & 0x1) ^ pde) 361 { 362 /* 363 ** BUMMER! -- res_map != pdir -- 364 ** Dump rval and matching pdir entries 365 */ 366 sba_dump_pdir_entry(ioc, msg, pide); 367 return(1); 368 } 369 rcnt--; 370 rval >>= 1; /* try the next bit */ 371 pptr++; 372 pide++; 373 } 374 rptr++; /* look at next word of res_map */ 375 } 376 /* It'd be nice if we always got here :^) */ 377 return 0; 378} 379 380 381/** 382 * sba_dump_sg - debugging only - print Scatter-Gather list 383 * @ioc: IO MMU structure which owns the pdir we are interested in. 384 * @startsg: head of the SG list 385 * @nents: number of entries in SG list 386 * 387 * print the SG list so we can verify it's correct by hand. 388 */ 389static void 390sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) 391{ 392 while (nents-- > 0) { 393 printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents, 394 startsg->dma_address, startsg->dma_length, 395 sba_sg_address(startsg)); 396 startsg++; 397 } 398} 399 400static void 401sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) 402{ 403 struct scatterlist *the_sg = startsg; 404 int the_nents = nents; 405 406 while (the_nents-- > 0) { 407 if (sba_sg_address(the_sg) == 0x0UL) 408 sba_dump_sg(NULL, startsg, nents); 409 the_sg++; 410 } 411} 412 413#endif /* ASSERT_PDIR_SANITY */ 414 415 416 417 418/************************************************************** 419* 420* I/O Pdir Resource Management 421* 422* Bits set in the resource map are in use. 423* Each bit can represent a number of pages. 424* LSbs represent lower addresses (IOVA's). 425* 426***************************************************************/ 427#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */ 428 429/* Convert from IOVP to IOVA and vice versa. */ 430#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset)) 431#define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase)) 432 433#define PDIR_ENTRY_SIZE sizeof(u64) 434 435#define PDIR_INDEX(iovp) ((iovp)>>iovp_shift) 436 437#define RESMAP_MASK(n) ~(~0UL << (n)) 438#define RESMAP_IDX_MASK (sizeof(unsigned long) - 1) 439 440 441/** 442 * For most cases the normal get_order is sufficient, however it limits us 443 * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity. 444 * It only incurs about 1 clock cycle to use this one with the static variable 445 * and makes the code more intuitive. 446 */ 447static SBA_INLINE int 448get_iovp_order (unsigned long size) 449{ 450 long double d = size - 1; 451 long order; 452 453 order = ia64_getf_exp(d); 454 order = order - iovp_shift - 0xffff + 1; 455 if (order < 0) 456 order = 0; 457 return order; 458} 459 460/** 461 * sba_search_bitmap - find free space in IO PDIR resource bitmap 462 * @ioc: IO MMU structure which owns the pdir we are interested in. 463 * @bits_wanted: number of entries we need. 464 * @use_hint: use res_hint to indicate where to start looking 465 * 466 * Find consecutive free bits in resource bitmap. 467 * Each bit represents one entry in the IO Pdir. 468 * Cool perf optimization: search for log2(size) bits at a time. 469 */ 470static SBA_INLINE unsigned long 471sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint) 472{ 473 unsigned long *res_ptr; 474 unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); 475 unsigned long flags, pide = ~0UL; 476 477 ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); 478 ASSERT(res_ptr < res_end); 479 480 spin_lock_irqsave(&ioc->res_lock, flags); 481 482 /* Allow caller to force a search through the entire resource space */ 483 if (likely(use_hint)) { 484 res_ptr = ioc->res_hint; 485 } else { 486 res_ptr = (ulong *)ioc->res_map; 487 ioc->res_bitshift = 0; 488 } 489 490 /* 491 * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts 492 * if a TLB entry is purged while in use. sba_mark_invalid() 493 * purges IOTLB entries in power-of-two sizes, so we also 494 * allocate IOVA space in power-of-two sizes. 495 */ 496 bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift); 497 498 if (likely(bits_wanted == 1)) { 499 unsigned int bitshiftcnt; 500 for(; res_ptr < res_end ; res_ptr++) { 501 if (likely(*res_ptr != ~0UL)) { 502 bitshiftcnt = ffz(*res_ptr); 503 *res_ptr |= (1UL << bitshiftcnt); 504 pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); 505 pide <<= 3; /* convert to bit address */ 506 pide += bitshiftcnt; 507 ioc->res_bitshift = bitshiftcnt + bits_wanted; 508 goto found_it; 509 } 510 } 511 goto not_found; 512 513 } 514 515 if (likely(bits_wanted <= BITS_PER_LONG/2)) { 516 /* 517 ** Search the resource bit map on well-aligned values. 518 ** "o" is the alignment. 519 ** We need the alignment to invalidate I/O TLB using 520 ** SBA HW features in the unmap path. 521 */ 522 unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift); 523 uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o); 524 unsigned long mask, base_mask; 525 526 base_mask = RESMAP_MASK(bits_wanted); 527 mask = base_mask << bitshiftcnt; 528 529 DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr); 530 for(; res_ptr < res_end ; res_ptr++) 531 { 532 DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); 533 ASSERT(0 != mask); 534 for (; mask ; mask <<= o, bitshiftcnt += o) { 535 if(0 == ((*res_ptr) & mask)) { 536 *res_ptr |= mask; /* mark resources busy! */ 537 pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); 538 pide <<= 3; /* convert to bit address */ 539 pide += bitshiftcnt; 540 ioc->res_bitshift = bitshiftcnt + bits_wanted; 541 goto found_it; 542 } 543 } 544 545 bitshiftcnt = 0; 546 mask = base_mask; 547 548 } 549 550 } else { 551 int qwords, bits, i; 552 unsigned long *end; 553 554 qwords = bits_wanted >> 6; /* /64 */ 555 bits = bits_wanted - (qwords * BITS_PER_LONG); 556 557 end = res_end - qwords; 558 559 for (; res_ptr < end; res_ptr++) { 560 for (i = 0 ; i < qwords ; i++) { 561 if (res_ptr[i] != 0) 562 goto next_ptr; 563 } 564 if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits)) 565 continue; 566 567 /* Found it, mark it */ 568 for (i = 0 ; i < qwords ; i++) 569 res_ptr[i] = ~0UL; 570 res_ptr[i] |= RESMAP_MASK(bits); 571 572 pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); 573 pide <<= 3; /* convert to bit address */ 574 res_ptr += qwords; 575 ioc->res_bitshift = bits; 576 goto found_it; 577next_ptr: 578 ; 579 } 580 } 581 582not_found: 583 prefetch(ioc->res_map); 584 ioc->res_hint = (unsigned long *) ioc->res_map; 585 ioc->res_bitshift = 0; 586 spin_unlock_irqrestore(&ioc->res_lock, flags); 587 return (pide); 588 589found_it: 590 ioc->res_hint = res_ptr; 591 spin_unlock_irqrestore(&ioc->res_lock, flags); 592 return (pide); 593} 594 595 596/** 597 * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap 598 * @ioc: IO MMU structure which owns the pdir we are interested in. 599 * @size: number of bytes to create a mapping for 600 * 601 * Given a size, find consecutive unmarked and then mark those bits in the 602 * resource bit map. 603 */ 604static int 605sba_alloc_range(struct ioc *ioc, size_t size) 606{ 607 unsigned int pages_needed = size >> iovp_shift; 608#ifdef PDIR_SEARCH_TIMING 609 unsigned long itc_start; 610#endif 611 unsigned long pide; 612 613 ASSERT(pages_needed); 614 ASSERT(0 == (size & ~iovp_mask)); 615 616#ifdef PDIR_SEARCH_TIMING 617 itc_start = ia64_get_itc(); 618#endif 619 /* 620 ** "seek and ye shall find"...praying never hurts either... 621 */ 622 pide = sba_search_bitmap(ioc, pages_needed, 1); 623 if (unlikely(pide >= (ioc->res_size << 3))) { 624 pide = sba_search_bitmap(ioc, pages_needed, 0); 625 if (unlikely(pide >= (ioc->res_size << 3))) { 626#if DELAYED_RESOURCE_CNT > 0 627 unsigned long flags; 628 629 /* 630 ** With delayed resource freeing, we can give this one more shot. We're 631 ** getting close to being in trouble here, so do what we can to make this 632 ** one count. 633 */ 634 spin_lock_irqsave(&ioc->saved_lock, flags); 635 if (ioc->saved_cnt > 0) { 636 struct sba_dma_pair *d; 637 int cnt = ioc->saved_cnt; 638 639 d = &(ioc->saved[ioc->saved_cnt - 1]); 640 641 spin_lock(&ioc->res_lock); 642 while (cnt--) { 643 sba_mark_invalid(ioc, d->iova, d->size); 644 sba_free_range(ioc, d->iova, d->size); 645 d--; 646 } 647 ioc->saved_cnt = 0; 648 READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ 649 spin_unlock(&ioc->res_lock); 650 } 651 spin_unlock_irqrestore(&ioc->saved_lock, flags); 652 653 pide = sba_search_bitmap(ioc, pages_needed, 0); 654 if (unlikely(pide >= (ioc->res_size << 3))) 655 panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", 656 ioc->ioc_hpa); 657#else 658 panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", 659 ioc->ioc_hpa); 660#endif 661 } 662 } 663 664#ifdef PDIR_SEARCH_TIMING 665 ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed; 666 ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1; 667#endif 668 669 prefetchw(&(ioc->pdir_base[pide])); 670 671#ifdef ASSERT_PDIR_SANITY 672 /* verify the first enable bit is clear */ 673 if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) { 674 sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide); 675 } 676#endif 677 678 DBG_RES("%s(%x) %d -> %lx hint %x/%x\n", 679 __FUNCTION__, size, pages_needed, pide, 680 (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map), 681 ioc->res_bitshift ); 682 683 return (pide); 684} 685 686 687/** 688 * sba_free_range - unmark bits in IO PDIR resource bitmap 689 * @ioc: IO MMU structure which owns the pdir we are interested in. 690 * @iova: IO virtual address which was previously allocated. 691 * @size: number of bytes to create a mapping for 692 * 693 * clear bits in the ioc's resource map 694 */ 695static SBA_INLINE void 696sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size) 697{ 698 unsigned long iovp = SBA_IOVP(ioc, iova); 699 unsigned int pide = PDIR_INDEX(iovp); 700 unsigned int ridx = pide >> 3; /* convert bit to byte address */ 701 unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]); 702 int bits_not_wanted = size >> iovp_shift; 703 unsigned long m; 704 705 /* Round up to power-of-two size: see AR2305 note above */ 706 bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift); 707 for (; bits_not_wanted > 0 ; res_ptr++) { 708 709 if (unlikely(bits_not_wanted > BITS_PER_LONG)) { 710 711 /* these mappings start 64bit aligned */ 712 *res_ptr = 0UL; 713 bits_not_wanted -= BITS_PER_LONG; 714 pide += BITS_PER_LONG; 715 716 } else { 717 718 /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */ 719 m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1)); 720 bits_not_wanted = 0; 721 722 DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __FUNCTION__, (uint) iova, size, 723 bits_not_wanted, m, pide, res_ptr, *res_ptr); 724 725 ASSERT(m != 0); 726 ASSERT(bits_not_wanted); 727 ASSERT((*res_ptr & m) == m); /* verify same bits are set */ 728 *res_ptr &= ~m; 729 } 730 } 731} 732 733 734/************************************************************** 735* 736* "Dynamic DMA Mapping" support (aka "Coherent I/O") 737* 738***************************************************************/ 739 740/** 741 * sba_io_pdir_entry - fill in one IO PDIR entry 742 * @pdir_ptr: pointer to IO PDIR entry 743 * @vba: Virtual CPU address of buffer to map 744 * 745 * SBA Mapping Routine 746 * 747 * Given a virtual address (vba, arg1) sba_io_pdir_entry() 748 * loads the I/O PDIR entry pointed to by pdir_ptr (arg0). 749 * Each IO Pdir entry consists of 8 bytes as shown below 750 * (LSB == bit 0): 751 * 752 * 63 40 11 7 0 753 * +-+---------------------+----------------------------------+----+--------+ 754 * |V| U | PPN[39:12] | U | FF | 755 * +-+---------------------+----------------------------------+----+--------+ 756 * 757 * V == Valid Bit 758 * U == Unused 759 * PPN == Physical Page Number 760 * 761 * The physical address fields are filled with the results of virt_to_phys() 762 * on the vba. 763 */ 764 765#define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = ((vba & ~0xE000000000000FFFULL) \ 766 | 0x8000000000000000ULL) 767 768#ifdef ENABLE_MARK_CLEAN 769/** 770 * Since DMA is i-cache coherent, any (complete) pages that were written via 771 * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to 772 * flush them when they get mapped into an executable vm-area. 773 */ 774static void 775mark_clean (void *addr, size_t size) 776{ 777 unsigned long pg_addr, end; 778 779 pg_addr = PAGE_ALIGN((unsigned long) addr); 780 end = (unsigned long) addr + size; 781 while (pg_addr + PAGE_SIZE <= end) { 782 struct page *page = virt_to_page((void *)pg_addr); 783 set_bit(PG_arch_1, &page->flags); 784 pg_addr += PAGE_SIZE; 785 } 786} 787#endif 788 789/** 790 * sba_mark_invalid - invalidate one or more IO PDIR entries 791 * @ioc: IO MMU structure which owns the pdir we are interested in. 792 * @iova: IO Virtual Address mapped earlier 793 * @byte_cnt: number of bytes this mapping covers. 794 * 795 * Marking the IO PDIR entry(ies) as Invalid and invalidate 796 * corresponding IO TLB entry. The PCOM (Purge Command Register) 797 * is to purge stale entries in the IO TLB when unmapping entries. 798 * 799 * The PCOM register supports purging of multiple pages, with a minium 800 * of 1 page and a maximum of 2GB. Hardware requires the address be 801 * aligned to the size of the range being purged. The size of the range 802 * must be a power of 2. The "Cool perf optimization" in the 803 * allocation routine helps keep that true. 804 */ 805static SBA_INLINE void 806sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) 807{ 808 u32 iovp = (u32) SBA_IOVP(ioc,iova); 809 810 int off = PDIR_INDEX(iovp); 811 812 /* Must be non-zero and rounded up */ 813 ASSERT(byte_cnt > 0); 814 ASSERT(0 == (byte_cnt & ~iovp_mask)); 815 816#ifdef ASSERT_PDIR_SANITY 817 /* Assert first pdir entry is set */ 818 if (!(ioc->pdir_base[off] >> 60)) { 819 sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp)); 820 } 821#endif 822 823 if (byte_cnt <= iovp_size) 824 { 825 ASSERT(off < ioc->pdir_size); 826 827 iovp |= iovp_shift; /* set "size" field for PCOM */ 828 829#ifndef FULL_VALID_PDIR 830 /* 831 ** clear I/O PDIR entry "valid" bit 832 ** Do NOT clear the rest - save it for debugging. 833 ** We should only clear bits that have previously 834 ** been enabled. 835 */ 836 ioc->pdir_base[off] &= ~(0x80000000000000FFULL); 837#else 838 /* 839 ** If we want to maintain the PDIR as valid, put in 840 ** the spill page so devices prefetching won't 841 ** cause a hard fail. 842 */ 843 ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); 844#endif 845 } else { 846 u32 t = get_iovp_order(byte_cnt) + iovp_shift; 847 848 iovp |= t; 849 ASSERT(t <= 31); /* 2GB! Max value of "size" field */ 850 851 do { 852 /* verify this pdir entry is enabled */ 853 ASSERT(ioc->pdir_base[off] >> 63); 854#ifndef FULL_VALID_PDIR 855 /* clear I/O Pdir entry "valid" bit first */ 856 ioc->pdir_base[off] &= ~(0x80000000000000FFULL); 857#else 858 ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); 859#endif 860 off++; 861 byte_cnt -= iovp_size; 862 } while (byte_cnt > 0); 863 } 864 865 WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM); 866} 867 868/** 869 * sba_map_single - map one buffer and return IOVA for DMA 870 * @dev: instance of PCI owned by the driver that's asking. 871 * @addr: driver buffer to map. 872 * @size: number of bytes to map in driver buffer. 873 * @dir: R/W or both. 874 * 875 * See Documentation/DMA-mapping.txt 876 */ 877dma_addr_t 878sba_map_single(struct device *dev, void *addr, size_t size, int dir) 879{ 880 struct ioc *ioc; 881 dma_addr_t iovp; 882 dma_addr_t offset; 883 u64 *pdir_start; 884 int pide; 885#ifdef ASSERT_PDIR_SANITY 886 unsigned long flags; 887#endif 888#ifdef ALLOW_IOV_BYPASS 889 unsigned long pci_addr = virt_to_phys(addr); 890#endif 891 892#ifdef ALLOW_IOV_BYPASS 893 ASSERT(to_pci_dev(dev)->dma_mask); 894 /* 895 ** Check if the PCI device can DMA to ptr... if so, just return ptr 896 */ 897 if (likely((pci_addr & ~to_pci_dev(dev)->dma_mask) == 0)) { 898 /* 899 ** Device is bit capable of DMA'ing to the buffer... 900 ** just return the PCI address of ptr 901 */ 902 DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n", 903 to_pci_dev(dev)->dma_mask, pci_addr); 904 return pci_addr; 905 } 906#endif 907 ioc = GET_IOC(dev); 908 ASSERT(ioc); 909 910 prefetch(ioc->res_hint); 911 912 ASSERT(size > 0); 913 ASSERT(size <= DMA_CHUNK_SIZE); 914 915 /* save offset bits */ 916 offset = ((dma_addr_t) (long) addr) & ~iovp_mask; 917 918 /* round up to nearest iovp_size */ 919 size = (size + offset + ~iovp_mask) & iovp_mask; 920 921#ifdef ASSERT_PDIR_SANITY 922 spin_lock_irqsave(&ioc->res_lock, flags); 923 if (sba_check_pdir(ioc,"Check before sba_map_single()")) 924 panic("Sanity check failed"); 925 spin_unlock_irqrestore(&ioc->res_lock, flags); 926#endif 927 928 pide = sba_alloc_range(ioc, size); 929 930 iovp = (dma_addr_t) pide << iovp_shift; 931 932 DBG_RUN("%s() 0x%p -> 0x%lx\n", 933 __FUNCTION__, addr, (long) iovp | offset); 934 935 pdir_start = &(ioc->pdir_base[pide]); 936 937 while (size > 0) { 938 ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */ 939 sba_io_pdir_entry(pdir_start, (unsigned long) addr); 940 941 DBG_RUN(" pdir 0x%p %lx\n", pdir_start, *pdir_start); 942 943 addr += iovp_size; 944 size -= iovp_size; 945 pdir_start++; 946 } 947 /* force pdir update */ 948 wmb(); 949 950 /* form complete address */ 951#ifdef ASSERT_PDIR_SANITY 952 spin_lock_irqsave(&ioc->res_lock, flags); 953 sba_check_pdir(ioc,"Check after sba_map_single()"); 954 spin_unlock_irqrestore(&ioc->res_lock, flags); 955#endif 956 return SBA_IOVA(ioc, iovp, offset); 957} 958 959#ifdef ENABLE_MARK_CLEAN 960static SBA_INLINE void 961sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) 962{ 963 u32 iovp = (u32) SBA_IOVP(ioc,iova); 964 int off = PDIR_INDEX(iovp); 965 void *addr; 966 967 if (size <= iovp_size) { 968 addr = phys_to_virt(ioc->pdir_base[off] & 969 ~0xE000000000000FFFULL); 970 mark_clean(addr, size); 971 } else { 972 do { 973 addr = phys_to_virt(ioc->pdir_base[off] & 974 ~0xE000000000000FFFULL); 975 mark_clean(addr, min(size, iovp_size)); 976 off++; 977 size -= iovp_size; 978 } while (size > 0); 979 } 980} 981#endif 982 983/** 984 * sba_unmap_single - unmap one IOVA and free resources 985 * @dev: instance of PCI owned by the driver that's asking. 986 * @iova: IOVA of driver buffer previously mapped. 987 * @size: number of bytes mapped in driver buffer. 988 * @dir: R/W or both. 989 * 990 * See Documentation/DMA-mapping.txt 991 */ 992void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir) 993{ 994 struct ioc *ioc; 995#if DELAYED_RESOURCE_CNT > 0 996 struct sba_dma_pair *d; 997#endif 998 unsigned long flags; 999 dma_addr_t offset; 1000 1001 ioc = GET_IOC(dev); 1002 ASSERT(ioc); 1003 1004#ifdef ALLOW_IOV_BYPASS 1005 if (likely((iova & ioc->imask) != ioc->ibase)) { 1006 /* 1007 ** Address does not fall w/in IOVA, must be bypassing 1008 */ 1009 DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova); 1010 1011#ifdef ENABLE_MARK_CLEAN 1012 if (dir == DMA_FROM_DEVICE) { 1013 mark_clean(phys_to_virt(iova), size); 1014 } 1015#endif 1016 return; 1017 } 1018#endif 1019 offset = iova & ~iovp_mask; 1020 1021 DBG_RUN("%s() iovp 0x%lx/%x\n", 1022 __FUNCTION__, (long) iova, size); 1023 1024 iova ^= offset; /* clear offset bits */ 1025 size += offset; 1026 size = ROUNDUP(size, iovp_size); 1027 1028#ifdef ENABLE_MARK_CLEAN 1029 if (dir == DMA_FROM_DEVICE) 1030 sba_mark_clean(ioc, iova, size); 1031#endif 1032 1033#if DELAYED_RESOURCE_CNT > 0 1034 spin_lock_irqsave(&ioc->saved_lock, flags); 1035 d = &(ioc->saved[ioc->saved_cnt]); 1036 d->iova = iova; 1037 d->size = size; 1038 if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) { 1039 int cnt = ioc->saved_cnt; 1040 spin_lock(&ioc->res_lock); 1041 while (cnt--) { 1042 sba_mark_invalid(ioc, d->iova, d->size); 1043 sba_free_range(ioc, d->iova, d->size); 1044 d--; 1045 } 1046 ioc->saved_cnt = 0; 1047 READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ 1048 spin_unlock(&ioc->res_lock); 1049 } 1050 spin_unlock_irqrestore(&ioc->saved_lock, flags); 1051#else /* DELAYED_RESOURCE_CNT == 0 */ 1052 spin_lock_irqsave(&ioc->res_lock, flags); 1053 sba_mark_invalid(ioc, iova, size); 1054 sba_free_range(ioc, iova, size); 1055 READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ 1056 spin_unlock_irqrestore(&ioc->res_lock, flags); 1057#endif /* DELAYED_RESOURCE_CNT == 0 */ 1058} 1059 1060 1061/** 1062 * sba_alloc_coherent - allocate/map shared mem for DMA 1063 * @dev: instance of PCI owned by the driver that's asking. 1064 * @size: number of bytes mapped in driver buffer. 1065 * @dma_handle: IOVA of new buffer. 1066 * 1067 * See Documentation/DMA-mapping.txt 1068 */ 1069void * 1070sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags) 1071{ 1072 struct ioc *ioc; 1073 void *addr; 1074 1075 ioc = GET_IOC(dev); 1076 ASSERT(ioc); 1077 1078#ifdef CONFIG_NUMA 1079 { 1080 struct page *page; 1081 page = alloc_pages_node(ioc->node == MAX_NUMNODES ? 1082 numa_node_id() : ioc->node, flags, 1083 get_order(size)); 1084 1085 if (unlikely(!page)) 1086 return NULL; 1087 1088 addr = page_address(page); 1089 } 1090#else 1091 addr = (void *) __get_free_pages(flags, get_order(size)); 1092#endif 1093 if (unlikely(!addr)) 1094 return NULL; 1095 1096 memset(addr, 0, size); 1097 *dma_handle = virt_to_phys(addr); 1098 1099#ifdef ALLOW_IOV_BYPASS 1100 ASSERT(dev->coherent_dma_mask); 1101 /* 1102 ** Check if the PCI device can DMA to ptr... if so, just return ptr 1103 */ 1104 if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) { 1105 DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n", 1106 dev->coherent_dma_mask, *dma_handle); 1107 1108 return addr; 1109 } 1110#endif 1111 1112 /* 1113 * If device can't bypass or bypass is disabled, pass the 32bit fake 1114 * device to map single to get an iova mapping. 1115 */ 1116 *dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0); 1117 1118 return addr; 1119} 1120 1121 1122/** 1123 * sba_free_coherent - free/unmap shared mem for DMA 1124 * @dev: instance of PCI owned by the driver that's asking. 1125 * @size: number of bytes mapped in driver buffer. 1126 * @vaddr: virtual address IOVA of "consistent" buffer. 1127 * @dma_handler: IO virtual address of "consistent" buffer. 1128 * 1129 * See Documentation/DMA-mapping.txt 1130 */ 1131void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) 1132{ 1133 sba_unmap_single(dev, dma_handle, size, 0); 1134 free_pages((unsigned long) vaddr, get_order(size)); 1135} 1136 1137 1138/* 1139** Since 0 is a valid pdir_base index value, can't use that 1140** to determine if a value is valid or not. Use a flag to indicate 1141** the SG list entry contains a valid pdir index. 1142*/ 1143#define PIDE_FLAG 0x1UL 1144 1145#ifdef DEBUG_LARGE_SG_ENTRIES 1146int dump_run_sg = 0; 1147#endif 1148 1149 1150/** 1151 * sba_fill_pdir - write allocated SG entries into IO PDIR 1152 * @ioc: IO MMU structure which owns the pdir we are interested in. 1153 * @startsg: list of IOVA/size pairs 1154 * @nents: number of entries in startsg list 1155 * 1156 * Take preprocessed SG list and write corresponding entries 1157 * in the IO PDIR. 1158 */ 1159 1160static SBA_INLINE int 1161sba_fill_pdir( 1162 struct ioc *ioc, 1163 struct scatterlist *startsg, 1164 int nents) 1165{ 1166 struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ 1167 int n_mappings = 0; 1168 u64 *pdirp = NULL; 1169 unsigned long dma_offset = 0; 1170 1171 dma_sg--; 1172 while (nents-- > 0) { 1173 int cnt = startsg->dma_length; 1174 startsg->dma_length = 0; 1175 1176#ifdef DEBUG_LARGE_SG_ENTRIES 1177 if (dump_run_sg) 1178 printk(" %2d : %08lx/%05x %p\n", 1179 nents, startsg->dma_address, cnt, 1180 sba_sg_address(startsg)); 1181#else 1182 DBG_RUN_SG(" %d : %08lx/%05x %p\n", 1183 nents, startsg->dma_address, cnt, 1184 sba_sg_address(startsg)); 1185#endif 1186 /* 1187 ** Look for the start of a new DMA stream 1188 */ 1189 if (startsg->dma_address & PIDE_FLAG) { 1190 u32 pide = startsg->dma_address & ~PIDE_FLAG; 1191 dma_offset = (unsigned long) pide & ~iovp_mask; 1192 startsg->dma_address = 0; 1193 dma_sg++; 1194 dma_sg->dma_address = pide | ioc->ibase; 1195 pdirp = &(ioc->pdir_base[pide >> iovp_shift]); 1196 n_mappings++; 1197 } 1198 1199 /* 1200 ** Look for a VCONTIG chunk 1201 */ 1202 if (cnt) { 1203 unsigned long vaddr = (unsigned long) sba_sg_address(startsg); 1204 ASSERT(pdirp); 1205 1206 /* Since multiple Vcontig blocks could make up 1207 ** one DMA stream, *add* cnt to dma_len. 1208 */ 1209 dma_sg->dma_length += cnt; 1210 cnt += dma_offset; 1211 dma_offset=0; /* only want offset on first chunk */ 1212 cnt = ROUNDUP(cnt, iovp_size); 1213 do { 1214 sba_io_pdir_entry(pdirp, vaddr); 1215 vaddr += iovp_size; 1216 cnt -= iovp_size; 1217 pdirp++; 1218 } while (cnt > 0); 1219 } 1220 startsg++; 1221 } 1222 /* force pdir update */ 1223 wmb(); 1224 1225#ifdef DEBUG_LARGE_SG_ENTRIES 1226 dump_run_sg = 0; 1227#endif 1228 return(n_mappings); 1229} 1230 1231 1232/* 1233** Two address ranges are DMA contiguous *iff* "end of prev" and 1234** "start of next" are both on an IOV page boundary. 1235** 1236** (shift left is a quick trick to mask off upper bits) 1237*/ 1238#define DMA_CONTIG(__X, __Y) \ 1239 (((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL) 1240 1241 1242/** 1243 * sba_coalesce_chunks - preprocess the SG list 1244 * @ioc: IO MMU structure which owns the pdir we are interested in. 1245 * @startsg: list of IOVA/size pairs 1246 * @nents: number of entries in startsg list 1247 * 1248 * First pass is to walk the SG list and determine where the breaks are 1249 * in the DMA stream. Allocates PDIR entries but does not fill them. 1250 * Returns the number of DMA chunks. 1251 * 1252 * Doing the fill separate from the coalescing/allocation keeps the 1253 * code simpler. Future enhancement could make one pass through 1254 * the sglist do both. 1255 */ 1256static SBA_INLINE int 1257sba_coalesce_chunks( struct ioc *ioc, 1258 struct scatterlist *startsg, 1259 int nents) 1260{ 1261 struct scatterlist *vcontig_sg; /* VCONTIG chunk head */ 1262 unsigned long vcontig_len; /* len of VCONTIG chunk */ 1263 unsigned long vcontig_end; 1264 struct scatterlist *dma_sg; /* next DMA stream head */ 1265 unsigned long dma_offset, dma_len; /* start/len of DMA stream */ 1266 int n_mappings = 0; 1267 1268 while (nents > 0) { 1269 unsigned long vaddr = (unsigned long) sba_sg_address(startsg); 1270 1271 /* 1272 ** Prepare for first/next DMA stream 1273 */ 1274 dma_sg = vcontig_sg = startsg; 1275 dma_len = vcontig_len = vcontig_end = startsg->length; 1276 vcontig_end += vaddr; 1277 dma_offset = vaddr & ~iovp_mask; 1278 1279 /* PARANOID: clear entries */ 1280 startsg->dma_address = startsg->dma_length = 0; 1281 1282 /* 1283 ** This loop terminates one iteration "early" since 1284 ** it's always looking one "ahead". 1285 */ 1286 while (--nents > 0) { 1287 unsigned long vaddr; /* tmp */ 1288 1289 startsg++; 1290 1291 /* PARANOID */ 1292 startsg->dma_address = startsg->dma_length = 0; 1293 1294 /* catch brokenness in SCSI layer */ 1295 ASSERT(startsg->length <= DMA_CHUNK_SIZE); 1296 1297 /* 1298 ** First make sure current dma stream won't 1299 ** exceed DMA_CHUNK_SIZE if we coalesce the 1300 ** next entry. 1301 */ 1302 if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask) 1303 > DMA_CHUNK_SIZE) 1304 break; 1305 1306 /* 1307 ** Then look for virtually contiguous blocks. 1308 ** 1309 ** append the next transaction? 1310 */ 1311 vaddr = (unsigned long) sba_sg_address(startsg); 1312 if (vcontig_end == vaddr) 1313 { 1314 vcontig_len += startsg->length; 1315 vcontig_end += startsg->length; 1316 dma_len += startsg->length; 1317 continue; 1318 } 1319 1320#ifdef DEBUG_LARGE_SG_ENTRIES 1321 dump_run_sg = (vcontig_len > iovp_size); 1322#endif 1323 1324 /* 1325 ** Not virtually contigous. 1326 ** Terminate prev chunk. 1327 ** Start a new chunk. 1328 ** 1329 ** Once we start a new VCONTIG chunk, dma_offset 1330 ** can't change. And we need the offset from the first 1331 ** chunk - not the last one. Ergo Successive chunks 1332 ** must start on page boundaries and dove tail 1333 ** with it's predecessor. 1334 */ 1335 vcontig_sg->dma_length = vcontig_len; 1336 1337 vcontig_sg = startsg; 1338 vcontig_len = startsg->length; 1339 1340 /* 1341 ** 3) do the entries end/start on page boundaries? 1342 ** Don't update vcontig_end until we've checked. 1343 */ 1344 if (DMA_CONTIG(vcontig_end, vaddr)) 1345 { 1346 vcontig_end = vcontig_len + vaddr; 1347 dma_len += vcontig_len; 1348 continue; 1349 } else { 1350 break; 1351 } 1352 } 1353 1354 /* 1355 ** End of DMA Stream 1356 ** Terminate last VCONTIG block. 1357 ** Allocate space for DMA stream. 1358 */ 1359 vcontig_sg->dma_length = vcontig_len; 1360 dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask; 1361 ASSERT(dma_len <= DMA_CHUNK_SIZE); 1362 dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG 1363 | (sba_alloc_range(ioc, dma_len) << iovp_shift) 1364 | dma_offset); 1365 n_mappings++; 1366 } 1367 1368 return n_mappings; 1369} 1370 1371 1372/** 1373 * sba_map_sg - map Scatter/Gather list 1374 * @dev: instance of PCI owned by the driver that's asking. 1375 * @sglist: array of buffer/length pairs 1376 * @nents: number of entries in list 1377 * @dir: R/W or both. 1378 * 1379 * See Documentation/DMA-mapping.txt 1380 */ 1381int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int dir) 1382{ 1383 struct ioc *ioc; 1384 int coalesced, filled = 0; 1385#ifdef ASSERT_PDIR_SANITY 1386 unsigned long flags; 1387#endif 1388#ifdef ALLOW_IOV_BYPASS_SG 1389 struct scatterlist *sg; 1390#endif 1391 1392 DBG_RUN_SG("%s() START %d entries\n", __FUNCTION__, nents); 1393 ioc = GET_IOC(dev); 1394 ASSERT(ioc); 1395 1396#ifdef ALLOW_IOV_BYPASS_SG 1397 ASSERT(to_pci_dev(dev)->dma_mask); 1398 if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) { 1399 for (sg = sglist ; filled < nents ; filled++, sg++){ 1400 sg->dma_length = sg->length; 1401 sg->dma_address = virt_to_phys(sba_sg_address(sg)); 1402 } 1403 return filled; 1404 } 1405#endif 1406 /* Fast path single entry scatterlists. */ 1407 if (nents == 1) { 1408 sglist->dma_length = sglist->length; 1409 sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir); 1410 return 1; 1411 } 1412 1413#ifdef ASSERT_PDIR_SANITY 1414 spin_lock_irqsave(&ioc->res_lock, flags); 1415 if (sba_check_pdir(ioc,"Check before sba_map_sg()")) 1416 { 1417 sba_dump_sg(ioc, sglist, nents); 1418 panic("Check before sba_map_sg()"); 1419 } 1420 spin_unlock_irqrestore(&ioc->res_lock, flags); 1421#endif 1422 1423 prefetch(ioc->res_hint); 1424 1425 /* 1426 ** First coalesce the chunks and allocate I/O pdir space 1427 ** 1428 ** If this is one DMA stream, we can properly map using the 1429 ** correct virtual address associated with each DMA page. 1430 ** w/o this association, we wouldn't have coherent DMA! 1431 ** Access to the virtual address is what forces a two pass algorithm. 1432 */ 1433 coalesced = sba_coalesce_chunks(ioc, sglist, nents); 1434 1435 /* 1436 ** Program the I/O Pdir 1437 ** 1438 ** map the virtual addresses to the I/O Pdir 1439 ** o dma_address will contain the pdir index 1440 ** o dma_len will contain the number of bytes to map 1441 ** o address contains the virtual address. 1442 */ 1443 filled = sba_fill_pdir(ioc, sglist, nents); 1444 1445#ifdef ASSERT_PDIR_SANITY 1446 spin_lock_irqsave(&ioc->res_lock, flags); 1447 if (sba_check_pdir(ioc,"Check after sba_map_sg()")) 1448 { 1449 sba_dump_sg(ioc, sglist, nents); 1450 panic("Check after sba_map_sg()\n"); 1451 } 1452 spin_unlock_irqrestore(&ioc->res_lock, flags); 1453#endif 1454 1455 ASSERT(coalesced == filled); 1456 DBG_RUN_SG("%s() DONE %d mappings\n", __FUNCTION__, filled); 1457 1458 return filled; 1459} 1460 1461 1462/** 1463 * sba_unmap_sg - unmap Scatter/Gather list 1464 * @dev: instance of PCI owned by the driver that's asking. 1465 * @sglist: array of buffer/length pairs 1466 * @nents: number of entries in list 1467 * @dir: R/W or both. 1468 * 1469 * See Documentation/DMA-mapping.txt 1470 */ 1471void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir) 1472{ 1473#ifdef ASSERT_PDIR_SANITY 1474 struct ioc *ioc; 1475 unsigned long flags; 1476#endif 1477 1478 DBG_RUN_SG("%s() START %d entries, %p,%x\n", 1479 __FUNCTION__, nents, sba_sg_address(sglist), sglist->length); 1480 1481#ifdef ASSERT_PDIR_SANITY 1482 ioc = GET_IOC(dev); 1483 ASSERT(ioc); 1484 1485 spin_lock_irqsave(&ioc->res_lock, flags); 1486 sba_check_pdir(ioc,"Check before sba_unmap_sg()"); 1487 spin_unlock_irqrestore(&ioc->res_lock, flags); 1488#endif 1489 1490 while (nents && sglist->dma_length) { 1491 1492 sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir); 1493 sglist++; 1494 nents--; 1495 } 1496 1497 DBG_RUN_SG("%s() DONE (nents %d)\n", __FUNCTION__, nents); 1498 1499#ifdef ASSERT_PDIR_SANITY 1500 spin_lock_irqsave(&ioc->res_lock, flags); 1501 sba_check_pdir(ioc,"Check after sba_unmap_sg()"); 1502 spin_unlock_irqrestore(&ioc->res_lock, flags); 1503#endif 1504 1505} 1506 1507/************************************************************** 1508* 1509* Initialization and claim 1510* 1511***************************************************************/ 1512 1513static void __init 1514ioc_iova_init(struct ioc *ioc) 1515{ 1516 int tcnfg; 1517 int agp_found = 0; 1518 struct pci_dev *device = NULL; 1519#ifdef FULL_VALID_PDIR 1520 unsigned long index; 1521#endif 1522 1523 /* 1524 ** Firmware programs the base and size of a "safe IOVA space" 1525 ** (one that doesn't overlap memory or LMMIO space) in the 1526 ** IBASE and IMASK registers. 1527 */ 1528 ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL; 1529 ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL; 1530 1531 ioc->iov_size = ~ioc->imask + 1; 1532 1533 DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n", 1534 __FUNCTION__, ioc->ioc_hpa, ioc->ibase, ioc->imask, 1535 ioc->iov_size >> 20); 1536 1537 switch (iovp_size) { 1538 case 4*1024: tcnfg = 0; break; 1539 case 8*1024: tcnfg = 1; break; 1540 case 16*1024: tcnfg = 2; break; 1541 case 64*1024: tcnfg = 3; break; 1542 default: 1543 panic(PFX "Unsupported IOTLB page size %ldK", 1544 iovp_size >> 10); 1545 break; 1546 } 1547 WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG); 1548 1549 ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE; 1550 ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL, 1551 get_order(ioc->pdir_size)); 1552 if (!ioc->pdir_base) 1553 panic(PFX "Couldn't allocate I/O Page Table\n"); 1554 1555 memset(ioc->pdir_base, 0, ioc->pdir_size); 1556 1557 DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __FUNCTION__, 1558 iovp_size >> 10, ioc->pdir_base, ioc->pdir_size); 1559 1560 ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base); 1561 WRITE_REG(virt_to_phys(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE); 1562 1563 /* 1564 ** If an AGP device is present, only use half of the IOV space 1565 ** for PCI DMA. Unfortunately we can't know ahead of time 1566 ** whether GART support will actually be used, for now we 1567 ** can just key on an AGP device found in the system. 1568 ** We program the next pdir index after we stop w/ a key for 1569 ** the GART code to handshake on. 1570 */ 1571 for_each_pci_dev(device) 1572 agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP); 1573 1574 if (agp_found && reserve_sba_gart) { 1575 printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n", 1576 ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2); 1577 ioc->pdir_size /= 2; 1578 ((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE; 1579 } 1580#ifdef FULL_VALID_PDIR 1581 /* 1582 ** Check to see if the spill page has been allocated, we don't need more than 1583 ** one across multiple SBAs. 1584 */ 1585 if (!prefetch_spill_page) { 1586 char *spill_poison = "SBAIOMMU POISON"; 1587 int poison_size = 16; 1588 void *poison_addr, *addr; 1589 1590 addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size)); 1591 if (!addr) 1592 panic(PFX "Couldn't allocate PDIR spill page\n"); 1593 1594 poison_addr = addr; 1595 for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size) 1596 memcpy(poison_addr, spill_poison, poison_size); 1597 1598 prefetch_spill_page = virt_to_phys(addr); 1599 1600 DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __FUNCTION__, prefetch_spill_page); 1601 } 1602 /* 1603 ** Set all the PDIR entries valid w/ the spill page as the target 1604 */ 1605 for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++) 1606 ((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page); 1607#endif 1608 1609 /* Clear I/O TLB of any possible entries */ 1610 WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM); 1611 READ_REG(ioc->ioc_hpa + IOC_PCOM); 1612 1613 /* Enable IOVA translation */ 1614 WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE); 1615 READ_REG(ioc->ioc_hpa + IOC_IBASE); 1616} 1617 1618static void __init 1619ioc_resource_init(struct ioc *ioc) 1620{ 1621 spin_lock_init(&ioc->res_lock); 1622#if DELAYED_RESOURCE_CNT > 0 1623 spin_lock_init(&ioc->saved_lock); 1624#endif 1625 1626 /* resource map size dictated by pdir_size */ 1627 ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */ 1628 ioc->res_size >>= 3; /* convert bit count to byte count */ 1629 DBG_INIT("%s() res_size 0x%x\n", __FUNCTION__, ioc->res_size); 1630 1631 ioc->res_map = (char *) __get_free_pages(GFP_KERNEL, 1632 get_order(ioc->res_size)); 1633 if (!ioc->res_map) 1634 panic(PFX "Couldn't allocate resource map\n"); 1635 1636 memset(ioc->res_map, 0, ioc->res_size); 1637 /* next available IOVP - circular search */ 1638 ioc->res_hint = (unsigned long *) ioc->res_map; 1639 1640#ifdef ASSERT_PDIR_SANITY 1641 /* Mark first bit busy - ie no IOVA 0 */ 1642 ioc->res_map[0] = 0x1; 1643 ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE; 1644#endif 1645#ifdef FULL_VALID_PDIR 1646 /* Mark the last resource used so we don't prefetch beyond IOVA space */ 1647 ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */ 1648 ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF 1649 | prefetch_spill_page); 1650#endif 1651 1652 DBG_INIT("%s() res_map %x %p\n", __FUNCTION__, 1653 ioc->res_size, (void *) ioc->res_map); 1654} 1655 1656static void __init 1657ioc_sac_init(struct ioc *ioc) 1658{ 1659 struct pci_dev *sac = NULL; 1660 struct pci_controller *controller = NULL; 1661 1662 /* 1663 * pci_alloc_coherent() must return a DMA address which is 1664 * SAC (single address cycle) addressable, so allocate a 1665 * pseudo-device to enforce that. 1666 */ 1667 sac = kzalloc(sizeof(*sac), GFP_KERNEL); 1668 if (!sac) 1669 panic(PFX "Couldn't allocate struct pci_dev"); 1670 1671 controller = kzalloc(sizeof(*controller), GFP_KERNEL); 1672 if (!controller) 1673 panic(PFX "Couldn't allocate struct pci_controller"); 1674 1675 controller->iommu = ioc; 1676 sac->sysdata = controller; 1677 sac->dma_mask = 0xFFFFFFFFUL; 1678#ifdef CONFIG_PCI 1679 sac->dev.bus = &pci_bus_type; 1680#endif 1681 ioc->sac_only_dev = sac; 1682} 1683 1684static void __init 1685ioc_zx1_init(struct ioc *ioc) 1686{ 1687 unsigned long rope_config; 1688 unsigned int i; 1689 1690 if (ioc->rev < 0x20) 1691 panic(PFX "IOC 2.0 or later required for IOMMU support\n"); 1692 1693 /* 38 bit memory controller + extra bit for range displaced by MMIO */ 1694 ioc->dma_mask = (0x1UL << 39) - 1; 1695 1696 /* 1697 ** Clear ROPE(N)_CONFIG AO bit. 1698 ** Disables "NT Ordering" (~= !"Relaxed Ordering") 1699 ** Overrides bit 1 in DMA Hint Sets. 1700 ** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701. 1701 */ 1702 for (i=0; i<(8*8); i+=8) { 1703 rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i); 1704 rope_config &= ~IOC_ROPE_AO; 1705 WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i); 1706 } 1707} 1708 1709typedef void (initfunc)(struct ioc *); 1710 1711struct ioc_iommu { 1712 u32 func_id; 1713 char *name; 1714 initfunc *init; 1715}; 1716 1717static struct ioc_iommu ioc_iommu_info[] __initdata = { 1718 { ZX1_IOC_ID, "zx1", ioc_zx1_init }, 1719 { ZX2_IOC_ID, "zx2", NULL }, 1720 { SX1000_IOC_ID, "sx1000", NULL }, 1721 { SX2000_IOC_ID, "sx2000", NULL }, 1722}; 1723 1724static struct ioc * __init 1725ioc_init(u64 hpa, void *handle) 1726{ 1727 struct ioc *ioc; 1728 struct ioc_iommu *info; 1729 1730 ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); 1731 if (!ioc) 1732 return NULL; 1733 1734 ioc->next = ioc_list; 1735 ioc_list = ioc; 1736 1737 ioc->handle = handle; 1738 ioc->ioc_hpa = ioremap(hpa, 0x1000); 1739 1740 ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID); 1741 ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL; 1742 ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL; /* conservative */ 1743 1744 for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) { 1745 if (ioc->func_id == info->func_id) { 1746 ioc->name = info->name; 1747 if (info->init) 1748 (info->init)(ioc); 1749 } 1750 } 1751 1752 iovp_size = (1 << iovp_shift); 1753 iovp_mask = ~(iovp_size - 1); 1754 1755 DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __FUNCTION__, 1756 PAGE_SIZE >> 10, iovp_size >> 10); 1757 1758 if (!ioc->name) { 1759 ioc->name = kmalloc(24, GFP_KERNEL); 1760 if (ioc->name) 1761 sprintf((char *) ioc->name, "Unknown (%04x:%04x)", 1762 ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF); 1763 else 1764 ioc->name = "Unknown"; 1765 } 1766 1767 ioc_iova_init(ioc); 1768 ioc_resource_init(ioc); 1769 ioc_sac_init(ioc); 1770 1771 if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask) 1772 ia64_max_iommu_merge_mask = ~iovp_mask; 1773 1774 printk(KERN_INFO PFX 1775 "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n", 1776 ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF, 1777 hpa, ioc->iov_size >> 20, ioc->ibase); 1778 1779 return ioc; 1780} 1781 1782 1783 1784 1785#ifdef CONFIG_PROC_FS 1786static void * 1787ioc_start(struct seq_file *s, loff_t *pos) 1788{ 1789 struct ioc *ioc; 1790 loff_t n = *pos; 1791 1792 for (ioc = ioc_list; ioc; ioc = ioc->next) 1793 if (!n--) 1794 return ioc; 1795 1796 return NULL; 1797} 1798 1799static void * 1800ioc_next(struct seq_file *s, void *v, loff_t *pos) 1801{ 1802 struct ioc *ioc = v; 1803 1804 ++*pos; 1805 return ioc->next; 1806} 1807 1808static void 1809ioc_stop(struct seq_file *s, void *v) 1810{ 1811} 1812 1813static int 1814ioc_show(struct seq_file *s, void *v) 1815{ 1816 struct ioc *ioc = v; 1817 unsigned long *res_ptr = (unsigned long *)ioc->res_map; 1818 int i, used = 0; 1819 1820 seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n", 1821 ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF)); 1822#ifdef CONFIG_NUMA 1823 if (ioc->node != MAX_NUMNODES) 1824 seq_printf(s, "NUMA node : %d\n", ioc->node); 1825#endif 1826 seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024)); 1827 seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024); 1828 1829 for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr) 1830 used += hweight64(*res_ptr); 1831 1832 seq_printf(s, "PDIR size : %d entries\n", ioc->pdir_size >> 3); 1833 seq_printf(s, "PDIR used : %d entries\n", used); 1834 1835#ifdef PDIR_SEARCH_TIMING 1836 { 1837 unsigned long i = 0, avg = 0, min, max; 1838 min = max = ioc->avg_search[0]; 1839 for (i = 0; i < SBA_SEARCH_SAMPLE; i++) { 1840 avg += ioc->avg_search[i]; 1841 if (ioc->avg_search[i] > max) max = ioc->avg_search[i]; 1842 if (ioc->avg_search[i] < min) min = ioc->avg_search[i]; 1843 } 1844 avg /= SBA_SEARCH_SAMPLE; 1845 seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n", 1846 min, avg, max); 1847 } 1848#endif 1849#ifndef ALLOW_IOV_BYPASS 1850 seq_printf(s, "IOVA bypass disabled\n"); 1851#endif 1852 return 0; 1853} 1854 1855static struct seq_operations ioc_seq_ops = { 1856 .start = ioc_start, 1857 .next = ioc_next, 1858 .stop = ioc_stop, 1859 .show = ioc_show 1860}; 1861 1862static int 1863ioc_open(struct inode *inode, struct file *file) 1864{ 1865 return seq_open(file, &ioc_seq_ops); 1866} 1867 1868static const struct file_operations ioc_fops = { 1869 .open = ioc_open, 1870 .read = seq_read, 1871 .llseek = seq_lseek, 1872 .release = seq_release 1873}; 1874 1875static void __init 1876ioc_proc_init(void) 1877{ 1878 struct proc_dir_entry *dir, *entry; 1879 1880 dir = proc_mkdir("bus/mckinley", NULL); 1881 if (!dir) 1882 return; 1883 1884 entry = create_proc_entry(ioc_list->name, 0, dir); 1885 if (entry) 1886 entry->proc_fops = &ioc_fops; 1887} 1888#endif 1889 1890static void 1891sba_connect_bus(struct pci_bus *bus) 1892{ 1893 acpi_handle handle, parent; 1894 acpi_status status; 1895 struct ioc *ioc; 1896 1897 if (!PCI_CONTROLLER(bus)) 1898 panic(PFX "no sysdata on bus %d!\n", bus->number); 1899 1900 if (PCI_CONTROLLER(bus)->iommu) 1901 return; 1902 1903 handle = PCI_CONTROLLER(bus)->acpi_handle; 1904 if (!handle) 1905 return; 1906 1907 /* 1908 * The IOC scope encloses PCI root bridges in the ACPI 1909 * namespace, so work our way out until we find an IOC we 1910 * claimed previously. 1911 */ 1912 do { 1913 for (ioc = ioc_list; ioc; ioc = ioc->next) 1914 if (ioc->handle == handle) { 1915 PCI_CONTROLLER(bus)->iommu = ioc; 1916 return; 1917 } 1918 1919 status = acpi_get_parent(handle, &parent); 1920 handle = parent; 1921 } while (ACPI_SUCCESS(status)); 1922 1923 printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number); 1924} 1925 1926#ifdef CONFIG_NUMA 1927static void __init 1928sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) 1929{ 1930 unsigned int node; 1931 int pxm; 1932 1933 ioc->node = MAX_NUMNODES; 1934 1935 pxm = acpi_get_pxm(handle); 1936 1937 if (pxm < 0) 1938 return; 1939 1940 node = pxm_to_node(pxm); 1941 1942 if (node >= MAX_NUMNODES || !node_online(node)) 1943 return; 1944 1945 ioc->node = node; 1946 return; 1947} 1948#else 1949#define sba_map_ioc_to_node(ioc, handle) 1950#endif 1951 1952static int __init 1953acpi_sba_ioc_add(struct acpi_device *device) 1954{ 1955 struct ioc *ioc; 1956 acpi_status status; 1957 u64 hpa, length; 1958 struct acpi_buffer buffer; 1959 struct acpi_device_info *dev_info; 1960 1961 status = hp_acpi_csr_space(device->handle, &hpa, &length); 1962 if (ACPI_FAILURE(status)) 1963 return 1; 1964 1965 buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; 1966 status = acpi_get_object_info(device->handle, &buffer); 1967 if (ACPI_FAILURE(status)) 1968 return 1; 1969 dev_info = buffer.pointer; 1970 1971 /* 1972 * For HWP0001, only SBA appears in ACPI namespace. It encloses the PCI 1973 * root bridges, and its CSR space includes the IOC function. 1974 */ 1975 if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) { 1976 hpa += ZX1_IOC_OFFSET; 1977 /* zx1 based systems default to kernel page size iommu pages */ 1978 if (!iovp_shift) 1979 iovp_shift = min(PAGE_SHIFT, 16); 1980 } 1981 kfree(dev_info); 1982 1983 /* 1984 * default anything not caught above or specified on cmdline to 4k 1985 * iommu page size 1986 */ 1987 if (!iovp_shift) 1988 iovp_shift = 12; 1989 1990 ioc = ioc_init(hpa, device->handle); 1991 if (!ioc) 1992 return 1; 1993 1994 /* setup NUMA node association */ 1995 sba_map_ioc_to_node(ioc, device->handle); 1996 return 0; 1997} 1998 1999static struct acpi_driver acpi_sba_ioc_driver = { 2000 .name = "IOC IOMMU Driver", 2001 .ids = "HWP0001,HWP0004", 2002 .ops = { 2003 .add = acpi_sba_ioc_add, 2004 }, 2005}; 2006 2007static int __init 2008sba_init(void) 2009{ 2010 if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb")) 2011 return 0; 2012 2013 acpi_bus_register_driver(&acpi_sba_ioc_driver); 2014 if (!ioc_list) { 2015#ifdef CONFIG_IA64_GENERIC 2016 extern int swiotlb_late_init_with_default_size (size_t size); 2017 2018 /* 2019 * If we didn't find something sba_iommu can claim, we 2020 * need to setup the swiotlb and switch to the dig machvec. 2021 */ 2022 if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0) 2023 panic("Unable to find SBA IOMMU or initialize " 2024 "software I/O TLB: Try machvec=dig boot option"); 2025 machvec_init("dig"); 2026#else 2027 panic("Unable to find SBA IOMMU: Try a generic or DIG kernel"); 2028#endif 2029 return 0; 2030 } 2031 2032#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB) 2033 /* 2034 * hpzx1_swiotlb needs to have a fairly small swiotlb bounce 2035 * buffer setup to support devices with smaller DMA masks than 2036 * sba_iommu can handle. 2037 */ 2038 if (ia64_platform_is("hpzx1_swiotlb")) { 2039 extern void hwsw_init(void); 2040 2041 hwsw_init(); 2042 } 2043#endif 2044 2045#ifdef CONFIG_PCI 2046 { 2047 struct pci_bus *b = NULL; 2048 while ((b = pci_find_next_bus(b)) != NULL) 2049 sba_connect_bus(b); 2050 } 2051#endif 2052 2053#ifdef CONFIG_PROC_FS 2054 ioc_proc_init(); 2055#endif 2056 return 0; 2057} 2058 2059subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */ 2060 2061static int __init 2062nosbagart(char *str) 2063{ 2064 reserve_sba_gart = 0; 2065 return 1; 2066} 2067 2068int 2069sba_dma_supported (struct device *dev, u64 mask) 2070{ 2071 /* make sure it's at least 32bit capable */ 2072 return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL); 2073} 2074 2075int 2076sba_dma_mapping_error (dma_addr_t dma_addr) 2077{ 2078 return 0; 2079} 2080 2081__setup("nosbagart", nosbagart); 2082 2083static int __init 2084sba_page_override(char *str) 2085{ 2086 unsigned long page_size; 2087 2088 page_size = memparse(str, &str); 2089 switch (page_size) { 2090 case 4096: 2091 case 8192: 2092 case 16384: 2093 case 65536: 2094 iovp_shift = ffs(page_size) - 1; 2095 break; 2096 default: 2097 printk("%s: unknown/unsupported iommu page size %ld\n", 2098 __FUNCTION__, page_size); 2099 } 2100 2101 return 1; 2102} 2103 2104__setup("sbapagesize=",sba_page_override); 2105 2106EXPORT_SYMBOL(sba_dma_mapping_error); 2107EXPORT_SYMBOL(sba_map_single); 2108EXPORT_SYMBOL(sba_unmap_single); 2109EXPORT_SYMBOL(sba_map_sg); 2110EXPORT_SYMBOL(sba_unmap_sg); 2111EXPORT_SYMBOL(sba_dma_supported); 2112EXPORT_SYMBOL(sba_alloc_coherent); 2113EXPORT_SYMBOL(sba_free_coherent); 2114