1/* 2 * Copyright 2002 Andi Kleen, SuSE Labs. 3 * Thanks to Ben LaHaise for precious feedback. 4 */ 5 6#include <linux/mm.h> 7#include <linux/sched.h> 8#include <linux/highmem.h> 9#include <linux/module.h> 10#include <linux/slab.h> 11#include <asm/uaccess.h> 12#include <asm/processor.h> 13#include <asm/tlbflush.h> 14#include <asm/pgalloc.h> 15#include <asm/sections.h> 16 17static DEFINE_SPINLOCK(cpa_lock); 18static struct list_head df_list = LIST_HEAD_INIT(df_list); 19 20 21pte_t *lookup_address(unsigned long address) 22{ 23 pgd_t *pgd = pgd_offset_k(address); 24 pud_t *pud; 25 pmd_t *pmd; 26 if (pgd_none(*pgd)) 27 return NULL; 28 pud = pud_offset(pgd, address); 29 if (pud_none(*pud)) 30 return NULL; 31 pmd = pmd_offset(pud, address); 32 if (pmd_none(*pmd)) 33 return NULL; 34 if (pmd_large(*pmd)) 35 return (pte_t *)pmd; 36 return pte_offset_kernel(pmd, address); 37} 38 39static struct page *split_large_page(unsigned long address, pgprot_t prot, 40 pgprot_t ref_prot) 41{ 42 int i; 43 unsigned long addr; 44 struct page *base; 45 pte_t *pbase; 46 47 spin_unlock_irq(&cpa_lock); 48 base = alloc_pages(GFP_KERNEL, 0); 49 spin_lock_irq(&cpa_lock); 50 if (!base) 51 return NULL; 52 53 /* 54 * page_private is used to track the number of entries in 55 * the page table page that have non standard attributes. 56 */ 57 SetPagePrivate(base); 58 page_private(base) = 0; 59 60 address = __pa(address); 61 addr = address & LARGE_PAGE_MASK; 62 pbase = (pte_t *)page_address(base); 63 paravirt_alloc_pt(page_to_pfn(base)); 64 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { 65 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, 66 addr == address ? prot : ref_prot)); 67 } 68 return base; 69} 70 71static void cache_flush_page(struct page *p) 72{ 73 unsigned long adr = (unsigned long)page_address(p); 74 int i; 75 for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) 76 asm volatile("clflush (%0)" :: "r" (adr + i)); 77} 78 79static void flush_kernel_map(void *arg) 80{ 81 struct list_head *lh = (struct list_head *)arg; 82 struct page *p; 83 84 /* High level code is not ready for clflush yet */ 85 if (0 && cpu_has_clflush) { 86 list_for_each_entry (p, lh, lru) 87 cache_flush_page(p); 88 } else if (boot_cpu_data.x86_model >= 4) 89 wbinvd(); 90 91 __flush_tlb_all(); 92} 93 94static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 95{ 96 struct page *page; 97 unsigned long flags; 98 99 set_pte_atomic(kpte, pte); /* change init_mm */ 100 if (SHARED_KERNEL_PMD) 101 return; 102 103 spin_lock_irqsave(&pgd_lock, flags); 104 for (page = pgd_list; page; page = (struct page *)page->index) { 105 pgd_t *pgd; 106 pud_t *pud; 107 pmd_t *pmd; 108 pgd = (pgd_t *)page_address(page) + pgd_index(address); 109 pud = pud_offset(pgd, address); 110 pmd = pmd_offset(pud, address); 111 set_pte_atomic((pte_t *)pmd, pte); 112 } 113 spin_unlock_irqrestore(&pgd_lock, flags); 114} 115 116/* 117 * No more special protections in this 2/4MB area - revert to a 118 * large page again. 119 */ 120static inline void revert_page(struct page *kpte_page, unsigned long address) 121{ 122 pgprot_t ref_prot; 123 pte_t *linear; 124 125 ref_prot = 126 ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) 127 ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE; 128 129 linear = (pte_t *) 130 pmd_offset(pud_offset(pgd_offset_k(address), address), address); 131 set_pmd_pte(linear, address, 132 pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT, 133 ref_prot)); 134} 135 136static int 137__change_page_attr(struct page *page, pgprot_t prot) 138{ 139 pte_t *kpte; 140 unsigned long address; 141 struct page *kpte_page; 142 143 BUG_ON(PageHighMem(page)); 144 address = (unsigned long)page_address(page); 145 146 kpte = lookup_address(address); 147 if (!kpte) 148 return -EINVAL; 149 kpte_page = virt_to_page(kpte); 150 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 151 if (!pte_huge(*kpte)) { 152 set_pte_atomic(kpte, mk_pte(page, prot)); 153 } else { 154 pgprot_t ref_prot; 155 struct page *split; 156 157 ref_prot = 158 ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) 159 ? PAGE_KERNEL_EXEC : PAGE_KERNEL; 160 split = split_large_page(address, prot, ref_prot); 161 if (!split) 162 return -ENOMEM; 163 set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); 164 kpte_page = split; 165 } 166 page_private(kpte_page)++; 167 } else if (!pte_huge(*kpte)) { 168 set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); 169 BUG_ON(page_private(kpte_page) == 0); 170 page_private(kpte_page)--; 171 } else 172 BUG(); 173 174 /* 175 * If the pte was reserved, it means it was created at boot 176 * time (not via split_large_page) and in turn we must not 177 * replace it with a largepage. 178 */ 179 if (!PageReserved(kpte_page)) { 180 if (cpu_has_pse && (page_private(kpte_page) == 0)) { 181 ClearPagePrivate(kpte_page); 182 paravirt_release_pt(page_to_pfn(kpte_page)); 183 list_add(&kpte_page->lru, &df_list); 184 revert_page(kpte_page, address); 185 } 186 } 187 return 0; 188} 189 190static inline void flush_map(struct list_head *l) 191{ 192 on_each_cpu(flush_kernel_map, l, 1, 1); 193} 194 195/* 196 * Change the page attributes of an page in the linear mapping. 197 * 198 * This should be used when a page is mapped with a different caching policy 199 * than write-back somewhere - some CPUs do not like it when mappings with 200 * different caching policies exist. This changes the page attributes of the 201 * in kernel linear mapping too. 202 * 203 * The caller needs to ensure that there are no conflicting mappings elsewhere. 204 * This function only deals with the kernel linear map. 205 * 206 * Caller must call global_flush_tlb() after this. 207 */ 208int change_page_attr(struct page *page, int numpages, pgprot_t prot) 209{ 210 int err = 0; 211 int i; 212 unsigned long flags; 213 214 spin_lock_irqsave(&cpa_lock, flags); 215 for (i = 0; i < numpages; i++, page++) { 216 err = __change_page_attr(page, prot); 217 if (err) 218 break; 219 } 220 spin_unlock_irqrestore(&cpa_lock, flags); 221 return err; 222} 223 224void global_flush_tlb(void) 225{ 226 struct list_head l; 227 struct page *pg, *next; 228 229 BUG_ON(irqs_disabled()); 230 231 spin_lock_irq(&cpa_lock); 232 list_replace_init(&df_list, &l); 233 spin_unlock_irq(&cpa_lock); 234 flush_map(&l); 235 list_for_each_entry_safe(pg, next, &l, lru) { 236 __free_page(pg); 237 } 238} 239 240#ifdef CONFIG_DEBUG_PAGEALLOC 241void kernel_map_pages(struct page *page, int numpages, int enable) 242{ 243 if (PageHighMem(page)) 244 return; 245 if (!enable) 246 debug_check_no_locks_freed(page_address(page), 247 numpages * PAGE_SIZE); 248 249 /* the return value is ignored - the calls cannot fail, 250 * large pages are disabled at boot time. 251 */ 252 change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); 253 /* we should perform an IPI and flush all tlbs, 254 * but that can deadlock->flush only current cpu. 255 */ 256 __flush_tlb_all(); 257} 258#endif 259 260EXPORT_SYMBOL(change_page_attr); 261EXPORT_SYMBOL(global_flush_tlb); 262