1/* 2 * linux/arch/i386/mm/pgtable.c 3 */ 4 5#include <linux/sched.h> 6#include <linux/kernel.h> 7#include <linux/errno.h> 8#include <linux/mm.h> 9#include <linux/swap.h> 10#include <linux/smp.h> 11#include <linux/highmem.h> 12#include <linux/slab.h> 13#include <linux/pagemap.h> 14#include <linux/spinlock.h> 15#include <linux/module.h> 16#include <linux/quicklist.h> 17 18#include <asm/system.h> 19#include <asm/pgtable.h> 20#include <asm/pgalloc.h> 21#include <asm/fixmap.h> 22#include <asm/e820.h> 23#include <asm/tlb.h> 24#include <asm/tlbflush.h> 25 26void show_mem(void) 27{ 28 int total = 0, reserved = 0; 29 int shared = 0, cached = 0; 30 int highmem = 0; 31 struct page *page; 32 pg_data_t *pgdat; 33 unsigned long i; 34 unsigned long flags; 35 36 printk(KERN_INFO "Mem-info:\n"); 37 show_free_areas(); 38 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 39 for_each_online_pgdat(pgdat) { 40 pgdat_resize_lock(pgdat, &flags); 41 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 42 page = pgdat_page_nr(pgdat, i); 43 total++; 44 if (PageHighMem(page)) 45 highmem++; 46 if (PageReserved(page)) 47 reserved++; 48 else if (PageSwapCache(page)) 49 cached++; 50 else if (page_count(page)) 51 shared += page_count(page) - 1; 52 } 53 pgdat_resize_unlock(pgdat, &flags); 54 } 55 printk(KERN_INFO "%d pages of RAM\n", total); 56 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); 57 printk(KERN_INFO "%d reserved pages\n", reserved); 58 printk(KERN_INFO "%d pages shared\n", shared); 59 printk(KERN_INFO "%d pages swap cached\n", cached); 60 61 printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); 62 printk(KERN_INFO "%lu pages writeback\n", 63 global_page_state(NR_WRITEBACK)); 64 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); 65 printk(KERN_INFO "%lu pages slab\n", 66 global_page_state(NR_SLAB_RECLAIMABLE) + 67 global_page_state(NR_SLAB_UNRECLAIMABLE)); 68 printk(KERN_INFO "%lu pages pagetables\n", 69 global_page_state(NR_PAGETABLE)); 70} 71 72/* 73 * Associate a virtual page frame with a given physical page frame 74 * and protection flags for that frame. 75 */ 76static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) 77{ 78 pgd_t *pgd; 79 pud_t *pud; 80 pmd_t *pmd; 81 pte_t *pte; 82 83 pgd = swapper_pg_dir + pgd_index(vaddr); 84 if (pgd_none(*pgd)) { 85 BUG(); 86 return; 87 } 88 pud = pud_offset(pgd, vaddr); 89 if (pud_none(*pud)) { 90 BUG(); 91 return; 92 } 93 pmd = pmd_offset(pud, vaddr); 94 if (pmd_none(*pmd)) { 95 BUG(); 96 return; 97 } 98 pte = pte_offset_kernel(pmd, vaddr); 99 if (pgprot_val(flags)) 100 /* <pfn,flags> stored as-is, to permit clearing entries */ 101 set_pte(pte, pfn_pte(pfn, flags)); 102 else 103 pte_clear(&init_mm, vaddr, pte); 104 105 /* 106 * It's enough to flush this one mapping. 107 * (PGE mappings get flushed as well) 108 */ 109 __flush_tlb_one(vaddr); 110} 111 112/* 113 * Associate a large virtual page frame with a given physical page frame 114 * and protection flags for that frame. pfn is for the base of the page, 115 * vaddr is what the page gets mapped to - both must be properly aligned. 116 * The pmd must already be instantiated. Assumes PAE mode. 117 */ 118void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) 119{ 120 pgd_t *pgd; 121 pud_t *pud; 122 pmd_t *pmd; 123 124 if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ 125 printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); 126 return; /* BUG(); */ 127 } 128 if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ 129 printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); 130 return; /* BUG(); */ 131 } 132 pgd = swapper_pg_dir + pgd_index(vaddr); 133 if (pgd_none(*pgd)) { 134 printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); 135 return; /* BUG(); */ 136 } 137 pud = pud_offset(pgd, vaddr); 138 pmd = pmd_offset(pud, vaddr); 139 set_pmd(pmd, pfn_pmd(pfn, flags)); 140 /* 141 * It's enough to flush this one mapping. 142 * (PGE mappings get flushed as well) 143 */ 144 __flush_tlb_one(vaddr); 145} 146 147static int fixmaps; 148unsigned long __FIXADDR_TOP = 0xfffff000; 149EXPORT_SYMBOL(__FIXADDR_TOP); 150 151void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) 152{ 153 unsigned long address = __fix_to_virt(idx); 154 155 if (idx >= __end_of_fixed_addresses) { 156 BUG(); 157 return; 158 } 159 set_pte_pfn(address, phys >> PAGE_SHIFT, flags); 160 fixmaps++; 161} 162 163/** 164 * reserve_top_address - reserves a hole in the top of kernel address space 165 * @reserve - size of hole to reserve 166 * 167 * Can be used to relocate the fixmap area and poke a hole in the top 168 * of kernel address space to make room for a hypervisor. 169 */ 170void reserve_top_address(unsigned long reserve) 171{ 172 BUG_ON(fixmaps > 0); 173 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", 174 (int)-reserve); 175 __FIXADDR_TOP = -reserve - PAGE_SIZE; 176 __VMALLOC_RESERVE += reserve; 177} 178 179pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 180{ 181 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); 182} 183 184struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) 185{ 186 struct page *pte; 187 188#ifdef CONFIG_HIGHPTE 189 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); 190#else 191 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); 192#endif 193 return pte; 194} 195 196void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) 197{ 198 memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); 199} 200 201/* 202 * List of all pgd's needed for non-PAE so it can invalidate entries 203 * in both cached and uncached pgd's; not needed for PAE since the 204 * kernel pmd is shared. If PAE were not to share the pmd a similar 205 * tactic would be needed. This is essentially codepath-based locking 206 * against pageattr.c; it is the unique case in which a valid change 207 * of kernel pagetables can't be lazily synchronized by vmalloc faults. 208 * vmalloc faults work because attached pagetables are never freed. 209 * -- wli 210 */ 211DEFINE_SPINLOCK(pgd_lock); 212struct page *pgd_list; 213 214static inline void pgd_list_add(pgd_t *pgd) 215{ 216 struct page *page = virt_to_page(pgd); 217 page->index = (unsigned long)pgd_list; 218 if (pgd_list) 219 set_page_private(pgd_list, (unsigned long)&page->index); 220 pgd_list = page; 221 set_page_private(page, (unsigned long)&pgd_list); 222} 223 224static inline void pgd_list_del(pgd_t *pgd) 225{ 226 struct page *next, **pprev, *page = virt_to_page(pgd); 227 next = (struct page *)page->index; 228 pprev = (struct page **)page_private(page); 229 *pprev = next; 230 if (next) 231 set_page_private(next, (unsigned long)pprev); 232} 233 234 235 236#if (PTRS_PER_PMD == 1) 237/* Non-PAE pgd constructor */ 238void pgd_ctor(void *pgd) 239{ 240 unsigned long flags; 241 242 /* !PAE, no pagetable sharing */ 243 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); 244 245 spin_lock_irqsave(&pgd_lock, flags); 246 247 /* must happen under lock */ 248 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 249 swapper_pg_dir + USER_PTRS_PER_PGD, 250 KERNEL_PGD_PTRS); 251 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, 252 __pa(swapper_pg_dir) >> PAGE_SHIFT, 253 USER_PTRS_PER_PGD, 254 KERNEL_PGD_PTRS); 255 pgd_list_add(pgd); 256 spin_unlock_irqrestore(&pgd_lock, flags); 257} 258#else /* PTRS_PER_PMD > 1 */ 259/* PAE pgd constructor */ 260void pgd_ctor(void *pgd) 261{ 262 /* PAE, kernel PMD may be shared */ 263 264 if (SHARED_KERNEL_PMD) { 265 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 266 swapper_pg_dir + USER_PTRS_PER_PGD, 267 KERNEL_PGD_PTRS); 268 } else { 269 unsigned long flags; 270 271 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); 272 spin_lock_irqsave(&pgd_lock, flags); 273 pgd_list_add(pgd); 274 spin_unlock_irqrestore(&pgd_lock, flags); 275 } 276} 277#endif /* PTRS_PER_PMD */ 278 279void pgd_dtor(void *pgd) 280{ 281 unsigned long flags; /* can be called from interrupt context */ 282 283 if (SHARED_KERNEL_PMD) 284 return; 285 286 paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); 287 spin_lock_irqsave(&pgd_lock, flags); 288 pgd_list_del(pgd); 289 spin_unlock_irqrestore(&pgd_lock, flags); 290} 291 292#define UNSHARED_PTRS_PER_PGD \ 293 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) 294 295/* If we allocate a pmd for part of the kernel address space, then 296 make sure its initialized with the appropriate kernel mappings. 297 Otherwise use a cached zeroed pmd. */ 298static pmd_t *pmd_cache_alloc(int idx) 299{ 300 pmd_t *pmd; 301 302 if (idx >= USER_PTRS_PER_PGD) { 303 pmd = (pmd_t *)__get_free_page(GFP_KERNEL); 304 305 if (pmd) 306 memcpy(pmd, 307 (void *)pgd_page_vaddr(swapper_pg_dir[idx]), 308 sizeof(pmd_t) * PTRS_PER_PMD); 309 } else 310 pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); 311 312 return pmd; 313} 314 315static void pmd_cache_free(pmd_t *pmd, int idx) 316{ 317 if (idx >= USER_PTRS_PER_PGD) 318 free_page((unsigned long)pmd); 319 else 320 kmem_cache_free(pmd_cache, pmd); 321} 322 323pgd_t *pgd_alloc(struct mm_struct *mm) 324{ 325 int i; 326 pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); 327 328 if (PTRS_PER_PMD == 1 || !pgd) 329 return pgd; 330 331 for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { 332 pmd_t *pmd = pmd_cache_alloc(i); 333 334 if (!pmd) 335 goto out_oom; 336 337 paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); 338 set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); 339 } 340 return pgd; 341 342out_oom: 343 for (i--; i >= 0; i--) { 344 pgd_t pgdent = pgd[i]; 345 void* pmd = (void *)__va(pgd_val(pgdent)-1); 346 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); 347 pmd_cache_free(pmd, i); 348 } 349 quicklist_free(0, pgd_dtor, pgd); 350 return NULL; 351} 352 353void pgd_free(pgd_t *pgd) 354{ 355 int i; 356 357 /* in the PAE case user pgd entries are overwritten before usage */ 358 if (PTRS_PER_PMD > 1) 359 for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { 360 pgd_t pgdent = pgd[i]; 361 void* pmd = (void *)__va(pgd_val(pgdent)-1); 362 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); 363 pmd_cache_free(pmd, i); 364 } 365 /* in the non-PAE case, free_pgtables() clears user pgd entries */ 366 quicklist_free(0, pgd_dtor, pgd); 367} 368 369void check_pgt_cache(void) 370{ 371 quicklist_trim(0, pgd_dtor, 25, 16); 372} 373