1/*
2 * Dynamic DMA mapping support for AMD Hammer.
3 *
4 * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
5 * This allows to use PCI devices that only support 32bit addresses on systems
6 * with more than 4GB.
7 *
8 * See Documentation/DMA-mapping.txt for the interface specification.
9 *
10 * Copyright 2002 Andi Kleen, SuSE Labs.
11 * $Id: pci-gart.c,v 1.1.1.1 2008/10/15 03:26:21 james26_jang Exp $
12 */
13
14/*
15 * Notebook:
16
17agpgart_be
18 check if the simple reservation scheme is enough.
19
20possible future tuning:
21 fast path for sg streaming mappings
22 more intelligent flush strategy - flush only a single NB?
23 move boundary between IOMMU and AGP in GART dynamically
24 could use exact fit in the gart in alloc_consistent, not order of two.
25*/
26
27#include <linux/config.h>
28#include <linux/types.h>
29#include <linux/ctype.h>
30#include <linux/agp_backend.h>
31#include <linux/init.h>
32#include <linux/mm.h>
33#include <linux/string.h>
34#include <linux/spinlock.h>
35#include <linux/pci.h>
36#include <linux/module.h>
37#include <asm/io.h>
38#include <asm/mtrr.h>
39#include <asm/bitops.h>
40#include <asm/pgtable.h>
41#include <asm/proto.h>
42#include "pci-x86_64.h"
43
44unsigned long iommu_bus_base;	/* GART remapping area (physical) */
45static unsigned long iommu_size; 	/* size of remapping area bytes */
46static unsigned long iommu_pages;	/* .. and in pages */
47
48u32 *iommu_gatt_base; 		/* Remapping table */
49
50int no_iommu;
51static int no_agp;
52int force_mmu = 1;
53
54extern int fallback_aper_order;
55extern int fallback_aper_force;
56
57/* Allocation bitmap for the remapping area */
58static spinlock_t iommu_bitmap_lock = SPIN_LOCK_UNLOCKED;
59static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
60
61#define GPTE_MASK 0xfffffff000
62#define GPTE_VALID    1
63#define GPTE_COHERENT 2
64#define GPTE_ENCODE(x,flag) (((x) & 0xfffffff0) | ((x) >> 28) | GPTE_VALID | (flag))
65#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((x) & 0xff0) << 28))
66
67#define for_all_nb(dev) \
68	pci_for_each_dev(dev) \
69		if (dev->bus->number == 0 && PCI_FUNC(dev->devfn) == 3 && \
70		    (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31))
71
72#define EMERGENCY_PAGES 32 /* = 128KB */
73
74#ifdef CONFIG_AGP
75extern int agp_init(void);
76#define AGPEXTERN extern
77#else
78#define AGPEXTERN
79#endif
80
81/* backdoor interface to AGP driver */
82AGPEXTERN int agp_memory_reserved;
83AGPEXTERN __u32 *agp_gatt_table;
84
85static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
86
87static unsigned long alloc_iommu(int size)
88{
89	unsigned long offset, flags;
90
91	spin_lock_irqsave(&iommu_bitmap_lock, flags);
92
93	offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
94	if (offset == -1)
95	       	offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
96	if (offset != -1) {
97		set_bit_string(iommu_gart_bitmap, offset, size);
98		next_bit = offset+size;
99		if (next_bit >= iommu_pages)
100			next_bit = 0;
101	}
102	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
103	return offset;
104}
105
106static void free_iommu(unsigned long offset, int size)
107{
108	unsigned long flags;
109	spin_lock_irqsave(&iommu_bitmap_lock, flags);
110	clear_bit_string(iommu_gart_bitmap, offset, size);
111	next_bit = offset;
112	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
113}
114
115static inline void flush_gart(void)
116{
117	struct pci_dev *nb;
118	for_all_nb(nb) {
119		u32 flag;
120		pci_read_config_dword(nb, 0x9c, &flag); /* could cache this */
121		/* could complain for PTE walk errors here (bit 1 of flag) */
122		flag |= 1;
123		pci_write_config_dword(nb, 0x9c, flag);
124	}
125}
126
127void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
128			   dma_addr_t *dma_handle)
129{
130	void *memory;
131	int gfp = GFP_ATOMIC;
132	int order, i;
133	unsigned long iommu_page;
134
135	if (hwdev == NULL || hwdev->dma_mask < 0xffffffff || no_iommu)
136		gfp |= GFP_DMA;
137
138	/*
139	 * First try to allocate continuous and use directly if already
140	 * in lowmem.
141	 */
142	order = get_order(size);
143	memory = (void *)__get_free_pages(gfp, order);
144	if (memory == NULL) {
145		return NULL;
146	} else {
147		int high = (unsigned long)virt_to_bus(memory) + size
148			>= 0xffffffff;
149		int mmu = high;
150		if (force_mmu)
151			mmu = 1;
152		if (no_iommu) {
153			if (high) goto error;
154			mmu = 0;
155		}
156		memset(memory, 0, size);
157		if (!mmu) {
158			*dma_handle = virt_to_bus(memory);
159			return memory;
160		}
161	}
162
163	iommu_page = alloc_iommu(1<<order);
164	if (iommu_page == -1)
165		goto error;
166
167   	/* Fill in the GATT, allocating pages as needed. */
168	for (i = 0; i < 1<<order; i++) {
169		unsigned long phys_mem;
170		void *mem = memory + i*PAGE_SIZE;
171		if (i > 0)
172			atomic_inc(&virt_to_page(mem)->count);
173		phys_mem = virt_to_phys(mem);
174		BUG_ON(phys_mem & ~PTE_MASK);
175		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem,GPTE_COHERENT);
176	}
177
178	flush_gart();
179	*dma_handle = iommu_bus_base + (iommu_page << PAGE_SHIFT);
180	return memory;
181
182 error:
183	free_pages((unsigned long)memory, order);
184	return NULL;
185}
186
187/*
188 * Unmap consistent memory.
189 * The caller must ensure that the device has finished accessing the mapping.
190 */
191void pci_free_consistent(struct pci_dev *hwdev, size_t size,
192			 void *vaddr, dma_addr_t bus)
193{
194	u64 pte;
195	int order = get_order(size);
196	unsigned long iommu_page;
197	int i;
198
199	if (bus < iommu_bus_base || bus > iommu_bus_base + iommu_size) {
200		free_pages((unsigned long)vaddr, order);
201		return;
202	}
203	iommu_page = (bus - iommu_bus_base) / PAGE_SIZE;
204	for (i = 0; i < 1<<order; i++) {
205		pte = iommu_gatt_base[iommu_page + i];
206		BUG_ON((pte & GPTE_VALID) == 0);
207		iommu_gatt_base[iommu_page + i] = 0;
208		free_page((unsigned long) __va(GPTE_DECODE(pte)));
209	}
210	flush_gart();
211	free_iommu(iommu_page, 1<<order);
212}
213
214#ifdef CONFIG_IOMMU_LEAK
215/* Debugging aid for drivers that don't free their IOMMU tables */
216static void **iommu_leak_tab;
217static int leak_trace;
218int iommu_leak_dumppages = 20;
219void dump_leak(void)
220{
221	int i;
222	static int dump;
223	if (dump || !iommu_leak_tab) return;
224	dump = 1;
225	show_stack(NULL);
226	printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_dumppages);
227	for (i = 0; i < iommu_leak_dumppages; i++)
228		printk("[%lu: %lx] ",
229		       iommu_pages-i,(unsigned long) iommu_leak_tab[iommu_pages-i]);
230	printk("\n");
231}
232#endif
233
234static void iommu_full(struct pci_dev *dev, void *addr, size_t size, int dir)
235{
236	/*
237	 * Ran out of IOMMU space for this operation. This is very bad.
238	 * Unfortunately the drivers cannot handle this operation properly.
239	 * Return some non mapped prereserved space in the aperture and
240	 * let the Northbridge deal with it. This will result in garbage
241	 * in the IO operation. When the size exceeds the prereserved space
242	 * memory corruption will occur or random memory will be DMAed
243	 * out. Hopefully no network devices use single mappings that big.
244	 */
245
246	printk(KERN_ERR
247  "PCI-DMA: Error: ran out out IOMMU space for %p size %lu at device %s[%s]\n",
248	       addr,size, dev ? dev->name : "?", dev ? dev->slot_name : "?");
249
250	if (size > PAGE_SIZE*EMERGENCY_PAGES) {
251		if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
252			panic("PCI-DMA: Memory will be corrupted\n");
253		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
254			panic("PCI-DMA: Random memory will be DMAed\n");
255	}
256
257#ifdef CONFIG_IOMMU_LEAK
258	dump_leak();
259#endif
260}
261
262static inline int need_iommu(struct pci_dev *dev, unsigned long addr, size_t size)
263{
264	u64 mask = dev ? dev->dma_mask : 0xffffffff;
265	int high = (~mask & (unsigned long)(addr + size)) != 0;
266	int mmu = high;
267	if (force_mmu)
268		mmu = 1;
269	if (no_iommu) {
270		if (high)
271			panic("pci_map_single: high address but no IOMMU.\n");
272		mmu = 0;
273	}
274	return mmu;
275}
276
277dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size,int dir)
278{
279	unsigned long iommu_page;
280	unsigned long phys_mem, bus;
281	int i, npages;
282
283	BUG_ON(dir == PCI_DMA_NONE);
284
285	phys_mem = virt_to_phys(addr);
286	if (!need_iommu(dev, phys_mem, size))
287		return phys_mem;
288
289	npages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
290
291	iommu_page = alloc_iommu(npages);
292	if (iommu_page == -1) {
293		iommu_full(dev, addr, size, dir);
294		return iommu_bus_base;
295	}
296
297	phys_mem &= PAGE_MASK;
298	for (i = 0; i < npages; i++, phys_mem += PAGE_SIZE) {
299		BUG_ON(phys_mem & ~PTE_MASK);
300
301		/*
302		 * Set coherent mapping here to avoid needing to flush
303		 * the caches on mapping.
304		 */
305		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem, GPTE_COHERENT);
306
307#ifdef CONFIG_IOMMU_LEAK
308		if (iommu_leak_tab)
309			iommu_leak_tab[iommu_page + i] = __builtin_return_address(0);
310#endif
311	}
312	flush_gart();
313
314	bus = iommu_bus_base + iommu_page*PAGE_SIZE;
315	return bus + ((unsigned long)addr & ~PAGE_MASK);
316}
317
318/*
319 * Free a temporary PCI mapping.
320 */
321void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
322		      size_t size, int direction)
323{
324	unsigned long iommu_page;
325	int i, npages;
326	if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
327	    dma_addr > iommu_bus_base + iommu_size)
328		return;
329	iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
330	npages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
331	for (i = 0; i < npages; i++) {
332		iommu_gatt_base[iommu_page + i] = 0;
333#ifdef CONFIG_IOMMU_LEAK
334		if (iommu_leak_tab)
335			iommu_leak_tab[iommu_page + i] = 0;
336#endif
337	}
338	flush_gart();
339	free_iommu(iommu_page, npages);
340}
341
342EXPORT_SYMBOL(pci_map_single);
343EXPORT_SYMBOL(pci_unmap_single);
344
345static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
346{
347	unsigned long a;
348	if (!iommu_size) {
349		iommu_size = aper_size;
350		if (!no_agp)
351			iommu_size /= 2;
352	}
353
354	a = aper + iommu_size;
355	iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
356
357	if (iommu_size < 64*1024*1024)
358		printk(KERN_WARNING
359  "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20);
360
361	return iommu_size;
362}
363
364static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
365{
366	unsigned aper_size = 0, aper_base_32;
367	u64 aper_base;
368	unsigned aper_order;
369
370	pci_read_config_dword(dev, 0x94, &aper_base_32);
371	pci_read_config_dword(dev, 0x90, &aper_order);
372	aper_order = (aper_order >> 1) & 7;
373
374	aper_base = aper_base_32 & 0x7fff;
375	aper_base <<= 25;
376
377	aper_size = (32 * 1024 * 1024) << aper_order;
378	if (aper_base + aper_size >= 0xffffffff || !aper_size)
379		aper_base = 0;
380
381	*size = aper_size;
382	return aper_base;
383}
384
385/*
386 * Private Northbridge GATT initialization in case we cannot use the
387 * AGP driver for some reason.
388 */
389static __init int init_k8_gatt(agp_kern_info *info)
390{
391	struct pci_dev *dev;
392	void *gatt;
393	unsigned aper_base, new_aper_base;
394	unsigned aper_size, gatt_size, new_aper_size;
395
396	aper_size = aper_base = info->aper_size = 0;
397	for_all_nb(dev) {
398		new_aper_base = read_aperture(dev, &new_aper_size);
399		if (!new_aper_base)
400	goto nommu;
401
402		if (!aper_base) {
403			aper_size = new_aper_size;
404			aper_base = new_aper_base;
405	}
406		if (aper_size != new_aper_size || aper_base != new_aper_base)
407		goto nommu;
408	}
409	if (!aper_base)
410		goto nommu;
411	info->aper_base = aper_base;
412	info->aper_size = aper_size>>20;
413
414	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
415	gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
416	if (!gatt)
417		panic("Cannot allocate GATT table");
418	memset(gatt, 0, gatt_size);
419	change_page_attr(virt_to_page(gatt), gatt_size/PAGE_SIZE, PAGE_KERNEL_NOCACHE);
420	agp_gatt_table = gatt;
421
422	for_all_nb(dev) {
423		u32 ctl;
424		u32 gatt_reg;
425
426		gatt_reg = ((u64)gatt) >> 12;
427		gatt_reg <<= 4;
428		pci_write_config_dword(dev, 0x98, gatt_reg);
429		pci_read_config_dword(dev, 0x90, &ctl);
430
431		ctl |= 1;
432		ctl &= ~((1<<4) | (1<<5));
433
434		pci_write_config_dword(dev, 0x90, ctl);
435	}
436	flush_gart();
437
438
439	printk("PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10);
440		return 0;
441
442 nommu:
443		printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
444	       KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.");
445	return -1;
446}
447
448void __init pci_iommu_init(void)
449{
450	agp_kern_info info;
451	unsigned long aper_size;
452	unsigned long iommu_start;
453
454#ifndef CONFIG_AGP
455	no_agp = 1;
456#else
457	no_agp = no_agp || (agp_init() < 0) || (agp_copy_info(&info) < 0);
458#endif
459
460	if (no_iommu || (!force_mmu && end_pfn < 0xffffffff>>PAGE_SHIFT)) {
461		printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
462		no_iommu = 1;
463		return;
464	}
465
466	if (no_agp) {
467		int err = -1;
468		printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
469		no_agp = 1;
470		if (force_mmu || end_pfn >= 0xffffffff>>PAGE_SHIFT)
471			err = init_k8_gatt(&info);
472		if (err < 0) {
473			printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
474			no_iommu = 1;
475			return;
476		}
477	}
478
479	aper_size = info.aper_size * 1024 * 1024;
480	iommu_size = check_iommu_size(info.aper_base, aper_size);
481	iommu_pages = iommu_size >> PAGE_SHIFT;
482
483	iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL,
484						    get_order(iommu_pages/8));
485	if (!iommu_gart_bitmap)
486		panic("Cannot allocate iommu bitmap\n");
487	memset(iommu_gart_bitmap, 0, iommu_pages/8);
488
489#ifdef CONFIG_IOMMU_LEAK
490	if (leak_trace) {
491		iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
492				  get_order(iommu_pages*sizeof(void *)));
493		if (iommu_leak_tab)
494			memset(iommu_leak_tab, 0, iommu_pages * 8);
495		else
496			printk("PCI-DMA: Cannot allocate leak trace area\n");
497	}
498#endif
499
500	/*
501	 * Out of IOMMU space handling.
502	 * Reserve some invalid pages at the beginning of the GART.
503	 */
504	set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
505
506	agp_memory_reserved = iommu_size;
507	printk(KERN_INFO"PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
508	       iommu_size>>20);
509
510	iommu_start = aper_size - iommu_size;
511	iommu_bus_base = info.aper_base + iommu_start;
512	iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
513	bad_dma_address = iommu_bus_base;
514
515	asm volatile("wbinvd" ::: "memory");
516}
517
518/* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]]
519   size  set size of iommu (in bytes)
520   noagp don't initialize the AGP driver and use full aperture.
521   off   don't use the IOMMU
522   leak  turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
523   memaper[=order] allocate an own aperture over RAM with size 32MB^order.
524*/
525__init int iommu_setup(char *opt)
526{
527    int arg;
528    char *p = opt;
529
530    for (;;) {
531	    if (!memcmp(p,"noagp", 5))
532		    no_agp = 1;
533	    if (!memcmp(p,"off", 3))
534		    no_iommu = 1;
535	    if (!memcmp(p,"force", 5))
536		    force_mmu = 1;
537	    if (!memcmp(p,"noforce", 7))
538		    force_mmu = 0;
539	    if (!memcmp(p, "memaper", 7)) {
540		    fallback_aper_force = 1;
541		    p += 7;
542		    if (*p == '=' && get_option(&p, &arg))
543			    fallback_aper_order = arg;
544	    }
545#ifdef CONFIG_IOMMU_LEAK
546	    if (!memcmp(p,"leak", 4))
547		    leak_trace = 1;
548#endif
549	    if (isdigit(*p) && get_option(&p, &arg))
550		    iommu_size = arg;
551	    do {
552		    if (*p == ' ' || *p == 0)
553			    return 0;
554	    } while (*p++ != ',');
555    }
556    return 1;
557}
558
559