1/* 2 * Dynamic DMA mapping support for AMD Hammer. 3 * 4 * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI. 5 * This allows to use PCI devices that only support 32bit addresses on systems 6 * with more than 4GB. 7 * 8 * See Documentation/DMA-mapping.txt for the interface specification. 9 * 10 * Copyright 2002 Andi Kleen, SuSE Labs. 11 * $Id: pci-gart.c,v 1.1.1.1 2008/10/15 03:26:21 james26_jang Exp $ 12 */ 13 14/* 15 * Notebook: 16 17agpgart_be 18 check if the simple reservation scheme is enough. 19 20possible future tuning: 21 fast path for sg streaming mappings 22 more intelligent flush strategy - flush only a single NB? 23 move boundary between IOMMU and AGP in GART dynamically 24 could use exact fit in the gart in alloc_consistent, not order of two. 25*/ 26 27#include <linux/config.h> 28#include <linux/types.h> 29#include <linux/ctype.h> 30#include <linux/agp_backend.h> 31#include <linux/init.h> 32#include <linux/mm.h> 33#include <linux/string.h> 34#include <linux/spinlock.h> 35#include <linux/pci.h> 36#include <linux/module.h> 37#include <asm/io.h> 38#include <asm/mtrr.h> 39#include <asm/bitops.h> 40#include <asm/pgtable.h> 41#include <asm/proto.h> 42#include "pci-x86_64.h" 43 44unsigned long iommu_bus_base; /* GART remapping area (physical) */ 45static unsigned long iommu_size; /* size of remapping area bytes */ 46static unsigned long iommu_pages; /* .. and in pages */ 47 48u32 *iommu_gatt_base; /* Remapping table */ 49 50int no_iommu; 51static int no_agp; 52int force_mmu = 1; 53 54extern int fallback_aper_order; 55extern int fallback_aper_force; 56 57/* Allocation bitmap for the remapping area */ 58static spinlock_t iommu_bitmap_lock = SPIN_LOCK_UNLOCKED; 59static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ 60 61#define GPTE_MASK 0xfffffff000 62#define GPTE_VALID 1 63#define GPTE_COHERENT 2 64#define GPTE_ENCODE(x,flag) (((x) & 0xfffffff0) | ((x) >> 28) | GPTE_VALID | (flag)) 65#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((x) & 0xff0) << 28)) 66 67#define for_all_nb(dev) \ 68 pci_for_each_dev(dev) \ 69 if (dev->bus->number == 0 && PCI_FUNC(dev->devfn) == 3 && \ 70 (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31)) 71 72#define EMERGENCY_PAGES 32 /* = 128KB */ 73 74#ifdef CONFIG_AGP 75extern int agp_init(void); 76#define AGPEXTERN extern 77#else 78#define AGPEXTERN 79#endif 80 81/* backdoor interface to AGP driver */ 82AGPEXTERN int agp_memory_reserved; 83AGPEXTERN __u32 *agp_gatt_table; 84 85static unsigned long next_bit; /* protected by iommu_bitmap_lock */ 86 87static unsigned long alloc_iommu(int size) 88{ 89 unsigned long offset, flags; 90 91 spin_lock_irqsave(&iommu_bitmap_lock, flags); 92 93 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); 94 if (offset == -1) 95 offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size); 96 if (offset != -1) { 97 set_bit_string(iommu_gart_bitmap, offset, size); 98 next_bit = offset+size; 99 if (next_bit >= iommu_pages) 100 next_bit = 0; 101 } 102 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 103 return offset; 104} 105 106static void free_iommu(unsigned long offset, int size) 107{ 108 unsigned long flags; 109 spin_lock_irqsave(&iommu_bitmap_lock, flags); 110 clear_bit_string(iommu_gart_bitmap, offset, size); 111 next_bit = offset; 112 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 113} 114 115static inline void flush_gart(void) 116{ 117 struct pci_dev *nb; 118 for_all_nb(nb) { 119 u32 flag; 120 pci_read_config_dword(nb, 0x9c, &flag); /* could cache this */ 121 /* could complain for PTE walk errors here (bit 1 of flag) */ 122 flag |= 1; 123 pci_write_config_dword(nb, 0x9c, flag); 124 } 125} 126 127void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, 128 dma_addr_t *dma_handle) 129{ 130 void *memory; 131 int gfp = GFP_ATOMIC; 132 int order, i; 133 unsigned long iommu_page; 134 135 if (hwdev == NULL || hwdev->dma_mask < 0xffffffff || no_iommu) 136 gfp |= GFP_DMA; 137 138 /* 139 * First try to allocate continuous and use directly if already 140 * in lowmem. 141 */ 142 order = get_order(size); 143 memory = (void *)__get_free_pages(gfp, order); 144 if (memory == NULL) { 145 return NULL; 146 } else { 147 int high = (unsigned long)virt_to_bus(memory) + size 148 >= 0xffffffff; 149 int mmu = high; 150 if (force_mmu) 151 mmu = 1; 152 if (no_iommu) { 153 if (high) goto error; 154 mmu = 0; 155 } 156 memset(memory, 0, size); 157 if (!mmu) { 158 *dma_handle = virt_to_bus(memory); 159 return memory; 160 } 161 } 162 163 iommu_page = alloc_iommu(1<<order); 164 if (iommu_page == -1) 165 goto error; 166 167 /* Fill in the GATT, allocating pages as needed. */ 168 for (i = 0; i < 1<<order; i++) { 169 unsigned long phys_mem; 170 void *mem = memory + i*PAGE_SIZE; 171 if (i > 0) 172 atomic_inc(&virt_to_page(mem)->count); 173 phys_mem = virt_to_phys(mem); 174 BUG_ON(phys_mem & ~PTE_MASK); 175 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem,GPTE_COHERENT); 176 } 177 178 flush_gart(); 179 *dma_handle = iommu_bus_base + (iommu_page << PAGE_SHIFT); 180 return memory; 181 182 error: 183 free_pages((unsigned long)memory, order); 184 return NULL; 185} 186 187/* 188 * Unmap consistent memory. 189 * The caller must ensure that the device has finished accessing the mapping. 190 */ 191void pci_free_consistent(struct pci_dev *hwdev, size_t size, 192 void *vaddr, dma_addr_t bus) 193{ 194 u64 pte; 195 int order = get_order(size); 196 unsigned long iommu_page; 197 int i; 198 199 if (bus < iommu_bus_base || bus > iommu_bus_base + iommu_size) { 200 free_pages((unsigned long)vaddr, order); 201 return; 202 } 203 iommu_page = (bus - iommu_bus_base) / PAGE_SIZE; 204 for (i = 0; i < 1<<order; i++) { 205 pte = iommu_gatt_base[iommu_page + i]; 206 BUG_ON((pte & GPTE_VALID) == 0); 207 iommu_gatt_base[iommu_page + i] = 0; 208 free_page((unsigned long) __va(GPTE_DECODE(pte))); 209 } 210 flush_gart(); 211 free_iommu(iommu_page, 1<<order); 212} 213 214#ifdef CONFIG_IOMMU_LEAK 215/* Debugging aid for drivers that don't free their IOMMU tables */ 216static void **iommu_leak_tab; 217static int leak_trace; 218int iommu_leak_dumppages = 20; 219void dump_leak(void) 220{ 221 int i; 222 static int dump; 223 if (dump || !iommu_leak_tab) return; 224 dump = 1; 225 show_stack(NULL); 226 printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_dumppages); 227 for (i = 0; i < iommu_leak_dumppages; i++) 228 printk("[%lu: %lx] ", 229 iommu_pages-i,(unsigned long) iommu_leak_tab[iommu_pages-i]); 230 printk("\n"); 231} 232#endif 233 234static void iommu_full(struct pci_dev *dev, void *addr, size_t size, int dir) 235{ 236 /* 237 * Ran out of IOMMU space for this operation. This is very bad. 238 * Unfortunately the drivers cannot handle this operation properly. 239 * Return some non mapped prereserved space in the aperture and 240 * let the Northbridge deal with it. This will result in garbage 241 * in the IO operation. When the size exceeds the prereserved space 242 * memory corruption will occur or random memory will be DMAed 243 * out. Hopefully no network devices use single mappings that big. 244 */ 245 246 printk(KERN_ERR 247 "PCI-DMA: Error: ran out out IOMMU space for %p size %lu at device %s[%s]\n", 248 addr,size, dev ? dev->name : "?", dev ? dev->slot_name : "?"); 249 250 if (size > PAGE_SIZE*EMERGENCY_PAGES) { 251 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) 252 panic("PCI-DMA: Memory will be corrupted\n"); 253 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) 254 panic("PCI-DMA: Random memory will be DMAed\n"); 255 } 256 257#ifdef CONFIG_IOMMU_LEAK 258 dump_leak(); 259#endif 260} 261 262static inline int need_iommu(struct pci_dev *dev, unsigned long addr, size_t size) 263{ 264 u64 mask = dev ? dev->dma_mask : 0xffffffff; 265 int high = (~mask & (unsigned long)(addr + size)) != 0; 266 int mmu = high; 267 if (force_mmu) 268 mmu = 1; 269 if (no_iommu) { 270 if (high) 271 panic("pci_map_single: high address but no IOMMU.\n"); 272 mmu = 0; 273 } 274 return mmu; 275} 276 277dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size,int dir) 278{ 279 unsigned long iommu_page; 280 unsigned long phys_mem, bus; 281 int i, npages; 282 283 BUG_ON(dir == PCI_DMA_NONE); 284 285 phys_mem = virt_to_phys(addr); 286 if (!need_iommu(dev, phys_mem, size)) 287 return phys_mem; 288 289 npages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT; 290 291 iommu_page = alloc_iommu(npages); 292 if (iommu_page == -1) { 293 iommu_full(dev, addr, size, dir); 294 return iommu_bus_base; 295 } 296 297 phys_mem &= PAGE_MASK; 298 for (i = 0; i < npages; i++, phys_mem += PAGE_SIZE) { 299 BUG_ON(phys_mem & ~PTE_MASK); 300 301 /* 302 * Set coherent mapping here to avoid needing to flush 303 * the caches on mapping. 304 */ 305 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem, GPTE_COHERENT); 306 307#ifdef CONFIG_IOMMU_LEAK 308 if (iommu_leak_tab) 309 iommu_leak_tab[iommu_page + i] = __builtin_return_address(0); 310#endif 311 } 312 flush_gart(); 313 314 bus = iommu_bus_base + iommu_page*PAGE_SIZE; 315 return bus + ((unsigned long)addr & ~PAGE_MASK); 316} 317 318/* 319 * Free a temporary PCI mapping. 320 */ 321void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, 322 size_t size, int direction) 323{ 324 unsigned long iommu_page; 325 int i, npages; 326 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || 327 dma_addr > iommu_bus_base + iommu_size) 328 return; 329 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; 330 npages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT; 331 for (i = 0; i < npages; i++) { 332 iommu_gatt_base[iommu_page + i] = 0; 333#ifdef CONFIG_IOMMU_LEAK 334 if (iommu_leak_tab) 335 iommu_leak_tab[iommu_page + i] = 0; 336#endif 337 } 338 flush_gart(); 339 free_iommu(iommu_page, npages); 340} 341 342EXPORT_SYMBOL(pci_map_single); 343EXPORT_SYMBOL(pci_unmap_single); 344 345static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) 346{ 347 unsigned long a; 348 if (!iommu_size) { 349 iommu_size = aper_size; 350 if (!no_agp) 351 iommu_size /= 2; 352 } 353 354 a = aper + iommu_size; 355 iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a; 356 357 if (iommu_size < 64*1024*1024) 358 printk(KERN_WARNING 359 "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20); 360 361 return iommu_size; 362} 363 364static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) 365{ 366 unsigned aper_size = 0, aper_base_32; 367 u64 aper_base; 368 unsigned aper_order; 369 370 pci_read_config_dword(dev, 0x94, &aper_base_32); 371 pci_read_config_dword(dev, 0x90, &aper_order); 372 aper_order = (aper_order >> 1) & 7; 373 374 aper_base = aper_base_32 & 0x7fff; 375 aper_base <<= 25; 376 377 aper_size = (32 * 1024 * 1024) << aper_order; 378 if (aper_base + aper_size >= 0xffffffff || !aper_size) 379 aper_base = 0; 380 381 *size = aper_size; 382 return aper_base; 383} 384 385/* 386 * Private Northbridge GATT initialization in case we cannot use the 387 * AGP driver for some reason. 388 */ 389static __init int init_k8_gatt(agp_kern_info *info) 390{ 391 struct pci_dev *dev; 392 void *gatt; 393 unsigned aper_base, new_aper_base; 394 unsigned aper_size, gatt_size, new_aper_size; 395 396 aper_size = aper_base = info->aper_size = 0; 397 for_all_nb(dev) { 398 new_aper_base = read_aperture(dev, &new_aper_size); 399 if (!new_aper_base) 400 goto nommu; 401 402 if (!aper_base) { 403 aper_size = new_aper_size; 404 aper_base = new_aper_base; 405 } 406 if (aper_size != new_aper_size || aper_base != new_aper_base) 407 goto nommu; 408 } 409 if (!aper_base) 410 goto nommu; 411 info->aper_base = aper_base; 412 info->aper_size = aper_size>>20; 413 414 gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); 415 gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); 416 if (!gatt) 417 panic("Cannot allocate GATT table"); 418 memset(gatt, 0, gatt_size); 419 change_page_attr(virt_to_page(gatt), gatt_size/PAGE_SIZE, PAGE_KERNEL_NOCACHE); 420 agp_gatt_table = gatt; 421 422 for_all_nb(dev) { 423 u32 ctl; 424 u32 gatt_reg; 425 426 gatt_reg = ((u64)gatt) >> 12; 427 gatt_reg <<= 4; 428 pci_write_config_dword(dev, 0x98, gatt_reg); 429 pci_read_config_dword(dev, 0x90, &ctl); 430 431 ctl |= 1; 432 ctl &= ~((1<<4) | (1<<5)); 433 434 pci_write_config_dword(dev, 0x90, ctl); 435 } 436 flush_gart(); 437 438 439 printk("PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10); 440 return 0; 441 442 nommu: 443 printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n" 444 KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction."); 445 return -1; 446} 447 448void __init pci_iommu_init(void) 449{ 450 agp_kern_info info; 451 unsigned long aper_size; 452 unsigned long iommu_start; 453 454#ifndef CONFIG_AGP 455 no_agp = 1; 456#else 457 no_agp = no_agp || (agp_init() < 0) || (agp_copy_info(&info) < 0); 458#endif 459 460 if (no_iommu || (!force_mmu && end_pfn < 0xffffffff>>PAGE_SHIFT)) { 461 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); 462 no_iommu = 1; 463 return; 464 } 465 466 if (no_agp) { 467 int err = -1; 468 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); 469 no_agp = 1; 470 if (force_mmu || end_pfn >= 0xffffffff>>PAGE_SHIFT) 471 err = init_k8_gatt(&info); 472 if (err < 0) { 473 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); 474 no_iommu = 1; 475 return; 476 } 477 } 478 479 aper_size = info.aper_size * 1024 * 1024; 480 iommu_size = check_iommu_size(info.aper_base, aper_size); 481 iommu_pages = iommu_size >> PAGE_SHIFT; 482 483 iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL, 484 get_order(iommu_pages/8)); 485 if (!iommu_gart_bitmap) 486 panic("Cannot allocate iommu bitmap\n"); 487 memset(iommu_gart_bitmap, 0, iommu_pages/8); 488 489#ifdef CONFIG_IOMMU_LEAK 490 if (leak_trace) { 491 iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, 492 get_order(iommu_pages*sizeof(void *))); 493 if (iommu_leak_tab) 494 memset(iommu_leak_tab, 0, iommu_pages * 8); 495 else 496 printk("PCI-DMA: Cannot allocate leak trace area\n"); 497 } 498#endif 499 500 /* 501 * Out of IOMMU space handling. 502 * Reserve some invalid pages at the beginning of the GART. 503 */ 504 set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); 505 506 agp_memory_reserved = iommu_size; 507 printk(KERN_INFO"PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", 508 iommu_size>>20); 509 510 iommu_start = aper_size - iommu_size; 511 iommu_bus_base = info.aper_base + iommu_start; 512 iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); 513 bad_dma_address = iommu_bus_base; 514 515 asm volatile("wbinvd" ::: "memory"); 516} 517 518/* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]] 519 size set size of iommu (in bytes) 520 noagp don't initialize the AGP driver and use full aperture. 521 off don't use the IOMMU 522 leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) 523 memaper[=order] allocate an own aperture over RAM with size 32MB^order. 524*/ 525__init int iommu_setup(char *opt) 526{ 527 int arg; 528 char *p = opt; 529 530 for (;;) { 531 if (!memcmp(p,"noagp", 5)) 532 no_agp = 1; 533 if (!memcmp(p,"off", 3)) 534 no_iommu = 1; 535 if (!memcmp(p,"force", 5)) 536 force_mmu = 1; 537 if (!memcmp(p,"noforce", 7)) 538 force_mmu = 0; 539 if (!memcmp(p, "memaper", 7)) { 540 fallback_aper_force = 1; 541 p += 7; 542 if (*p == '=' && get_option(&p, &arg)) 543 fallback_aper_order = arg; 544 } 545#ifdef CONFIG_IOMMU_LEAK 546 if (!memcmp(p,"leak", 4)) 547 leak_trace = 1; 548#endif 549 if (isdigit(*p) && get_option(&p, &arg)) 550 iommu_size = arg; 551 do { 552 if (*p == ' ' || *p == 0) 553 return 0; 554 } while (*p++ != ','); 555 } 556 return 1; 557} 558 559