• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/arch/x86/mm/
1/*
2 * IA-32 Huge TLB Page Support for Kernel.
3 *
4 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
5 */
6
7#include <linux/init.h>
8#include <linux/fs.h>
9#include <linux/mm.h>
10#include <linux/hugetlb.h>
11#include <linux/pagemap.h>
12#include <linux/err.h>
13#include <linux/sysctl.h>
14#include <asm/mman.h>
15#include <asm/tlb.h>
16#include <asm/tlbflush.h>
17#include <asm/pgalloc.h>
18
19static unsigned long page_table_shareable(struct vm_area_struct *svma,
20				struct vm_area_struct *vma,
21				unsigned long addr, pgoff_t idx)
22{
23	unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
24				svma->vm_start;
25	unsigned long sbase = saddr & PUD_MASK;
26	unsigned long s_end = sbase + PUD_SIZE;
27
28	/* Allow segments to share if only one is marked locked */
29	unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
30	unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
31
32	/*
33	 * match the virtual addresses, permission and the alignment of the
34	 * page table page.
35	 */
36	if (pmd_index(addr) != pmd_index(saddr) ||
37	    vm_flags != svm_flags ||
38	    sbase < svma->vm_start || svma->vm_end < s_end)
39		return 0;
40
41	return saddr;
42}
43
44static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
45{
46	unsigned long base = addr & PUD_MASK;
47	unsigned long end = base + PUD_SIZE;
48
49	/*
50	 * check on proper vm_flags and page table alignment
51	 */
52	if (vma->vm_flags & VM_MAYSHARE &&
53	    vma->vm_start <= base && end <= vma->vm_end)
54		return 1;
55	return 0;
56}
57
58/*
59 * search for a shareable pmd page for hugetlb.
60 */
61static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
62{
63	struct vm_area_struct *vma = find_vma(mm, addr);
64	struct address_space *mapping = vma->vm_file->f_mapping;
65	pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
66			vma->vm_pgoff;
67	struct prio_tree_iter iter;
68	struct vm_area_struct *svma;
69	unsigned long saddr;
70	pte_t *spte = NULL;
71
72	if (!vma_shareable(vma, addr))
73		return;
74
75	spin_lock(&mapping->i_mmap_lock);
76	vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
77		if (svma == vma)
78			continue;
79
80		saddr = page_table_shareable(svma, vma, addr, idx);
81		if (saddr) {
82			spte = huge_pte_offset(svma->vm_mm, saddr);
83			if (spte) {
84				get_page(virt_to_page(spte));
85				break;
86			}
87		}
88	}
89
90	if (!spte)
91		goto out;
92
93	spin_lock(&mm->page_table_lock);
94	if (pud_none(*pud))
95		pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK));
96	else
97		put_page(virt_to_page(spte));
98	spin_unlock(&mm->page_table_lock);
99out:
100	spin_unlock(&mapping->i_mmap_lock);
101}
102
103/*
104 * unmap huge page backed by shared pte.
105 *
106 * Hugetlb pte page is ref counted at the time of mapping.  If pte is shared
107 * indicated by page_count > 1, unmap is achieved by clearing pud and
108 * decrementing the ref count. If count == 1, the pte page is not shared.
109 *
110 * called with vma->vm_mm->page_table_lock held.
111 *
112 * returns: 1 successfully unmapped a shared pte page
113 *	    0 the underlying pte page is not shared, or it is the last user
114 */
115int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
116{
117	pgd_t *pgd = pgd_offset(mm, *addr);
118	pud_t *pud = pud_offset(pgd, *addr);
119
120	BUG_ON(page_count(virt_to_page(ptep)) == 0);
121	if (page_count(virt_to_page(ptep)) == 1)
122		return 0;
123
124	pud_clear(pud);
125	put_page(virt_to_page(ptep));
126	*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
127	return 1;
128}
129
130pte_t *huge_pte_alloc(struct mm_struct *mm,
131			unsigned long addr, unsigned long sz)
132{
133	pgd_t *pgd;
134	pud_t *pud;
135	pte_t *pte = NULL;
136
137	pgd = pgd_offset(mm, addr);
138	pud = pud_alloc(mm, pgd, addr);
139	if (pud) {
140		if (sz == PUD_SIZE) {
141			pte = (pte_t *)pud;
142		} else {
143			BUG_ON(sz != PMD_SIZE);
144			if (pud_none(*pud))
145				huge_pmd_share(mm, addr, pud);
146			pte = (pte_t *) pmd_alloc(mm, pud, addr);
147		}
148	}
149	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
150
151	return pte;
152}
153
154pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
155{
156	pgd_t *pgd;
157	pud_t *pud;
158	pmd_t *pmd = NULL;
159
160	pgd = pgd_offset(mm, addr);
161	if (pgd_present(*pgd)) {
162		pud = pud_offset(pgd, addr);
163		if (pud_present(*pud)) {
164			if (pud_large(*pud))
165				return (pte_t *)pud;
166			pmd = pmd_offset(pud, addr);
167		}
168	}
169	return (pte_t *) pmd;
170}
171
172
173struct page *
174follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
175{
176	return ERR_PTR(-EINVAL);
177}
178
179int pmd_huge(pmd_t pmd)
180{
181	return !!(pmd_val(pmd) & _PAGE_PSE);
182}
183
184int pud_huge(pud_t pud)
185{
186	return !!(pud_val(pud) & _PAGE_PSE);
187}
188
189struct page *
190follow_huge_pmd(struct mm_struct *mm, unsigned long address,
191		pmd_t *pmd, int write)
192{
193	struct page *page;
194
195	page = pte_page(*(pte_t *)pmd);
196	if (page)
197		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
198	return page;
199}
200
201struct page *
202follow_huge_pud(struct mm_struct *mm, unsigned long address,
203		pud_t *pud, int write)
204{
205	struct page *page;
206
207	page = pte_page(*(pte_t *)pud);
208	if (page)
209		page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
210	return page;
211}
212
213/* x86_64 also uses this file */
214
215#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
216static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
217		unsigned long addr, unsigned long len,
218		unsigned long pgoff, unsigned long flags)
219{
220	struct hstate *h = hstate_file(file);
221	struct mm_struct *mm = current->mm;
222	struct vm_area_struct *vma;
223	unsigned long start_addr;
224
225	if (len > mm->cached_hole_size) {
226	        start_addr = mm->free_area_cache;
227	} else {
228	        start_addr = TASK_UNMAPPED_BASE;
229	        mm->cached_hole_size = 0;
230	}
231
232full_search:
233	addr = ALIGN(start_addr, huge_page_size(h));
234
235	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
236		/* At this point:  (!vma || addr < vma->vm_end). */
237		if (TASK_SIZE - len < addr) {
238			/*
239			 * Start a new search - just in case we missed
240			 * some holes.
241			 */
242			if (start_addr != TASK_UNMAPPED_BASE) {
243				start_addr = TASK_UNMAPPED_BASE;
244				mm->cached_hole_size = 0;
245				goto full_search;
246			}
247			return -ENOMEM;
248		}
249		if (!vma || addr + len <= vma->vm_start) {
250			mm->free_area_cache = addr + len;
251			return addr;
252		}
253		if (addr + mm->cached_hole_size < vma->vm_start)
254		        mm->cached_hole_size = vma->vm_start - addr;
255		addr = ALIGN(vma->vm_end, huge_page_size(h));
256	}
257}
258
259static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
260		unsigned long addr0, unsigned long len,
261		unsigned long pgoff, unsigned long flags)
262{
263	struct hstate *h = hstate_file(file);
264	struct mm_struct *mm = current->mm;
265	struct vm_area_struct *vma, *prev_vma;
266	unsigned long base = mm->mmap_base, addr = addr0;
267	unsigned long largest_hole = mm->cached_hole_size;
268	int first_time = 1;
269
270	/* don't allow allocations above current base */
271	if (mm->free_area_cache > base)
272		mm->free_area_cache = base;
273
274	if (len <= largest_hole) {
275	        largest_hole = 0;
276		mm->free_area_cache  = base;
277	}
278try_again:
279	/* make sure it can fit in the remaining address space */
280	if (mm->free_area_cache < len)
281		goto fail;
282
283	/* either no address requested or cant fit in requested address hole */
284	addr = (mm->free_area_cache - len) & huge_page_mask(h);
285	do {
286		/*
287		 * Lookup failure means no vma is above this address,
288		 * i.e. return with success:
289		 */
290		if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
291			return addr;
292
293		/*
294		 * new region fits between prev_vma->vm_end and
295		 * vma->vm_start, use it:
296		 */
297		if (addr + len <= vma->vm_start &&
298		            (!prev_vma || (addr >= prev_vma->vm_end))) {
299			/* remember the address as a hint for next time */
300		        mm->cached_hole_size = largest_hole;
301		        return (mm->free_area_cache = addr);
302		} else {
303			/* pull free_area_cache down to the first hole */
304		        if (mm->free_area_cache == vma->vm_end) {
305				mm->free_area_cache = vma->vm_start;
306				mm->cached_hole_size = largest_hole;
307			}
308		}
309
310		/* remember the largest hole we saw so far */
311		if (addr + largest_hole < vma->vm_start)
312		        largest_hole = vma->vm_start - addr;
313
314		/* try just below the current vma->vm_start */
315		addr = (vma->vm_start - len) & huge_page_mask(h);
316	} while (len <= vma->vm_start);
317
318fail:
319	/*
320	 * if hint left us with no space for the requested
321	 * mapping then try again:
322	 */
323	if (first_time) {
324		mm->free_area_cache = base;
325		largest_hole = 0;
326		first_time = 0;
327		goto try_again;
328	}
329	/*
330	 * A failed mmap() very likely causes application failure,
331	 * so fall back to the bottom-up function here. This scenario
332	 * can happen with large stack limits and large mmap()
333	 * allocations.
334	 */
335	mm->free_area_cache = TASK_UNMAPPED_BASE;
336	mm->cached_hole_size = ~0UL;
337	addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
338			len, pgoff, flags);
339
340	/*
341	 * Restore the topdown base:
342	 */
343	mm->free_area_cache = base;
344	mm->cached_hole_size = ~0UL;
345
346	return addr;
347}
348
349unsigned long
350hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
351		unsigned long len, unsigned long pgoff, unsigned long flags)
352{
353	struct hstate *h = hstate_file(file);
354	struct mm_struct *mm = current->mm;
355	struct vm_area_struct *vma;
356
357	if (len & ~huge_page_mask(h))
358		return -EINVAL;
359	if (len > TASK_SIZE)
360		return -ENOMEM;
361
362	if (flags & MAP_FIXED) {
363		if (prepare_hugepage_range(file, addr, len))
364			return -EINVAL;
365		return addr;
366	}
367
368	if (addr) {
369		addr = ALIGN(addr, huge_page_size(h));
370		vma = find_vma(mm, addr);
371		if (TASK_SIZE - len >= addr &&
372		    (!vma || addr + len <= vma->vm_start))
373			return addr;
374	}
375	if (mm->get_unmapped_area == arch_get_unmapped_area)
376		return hugetlb_get_unmapped_area_bottomup(file, addr, len,
377				pgoff, flags);
378	else
379		return hugetlb_get_unmapped_area_topdown(file, addr, len,
380				pgoff, flags);
381}
382
383#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
384
385#ifdef CONFIG_X86_64
386static __init int setup_hugepagesz(char *opt)
387{
388	unsigned long ps = memparse(opt, &opt);
389	if (ps == PMD_SIZE) {
390		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
391	} else if (ps == PUD_SIZE && cpu_has_gbpages) {
392		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
393	} else {
394		printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
395			ps >> 20);
396		return 0;
397	}
398	return 1;
399}
400__setup("hugepagesz=", setup_hugepagesz);
401#endif
402