1/*
2 * Copyright 2002 Andi Kleen, SuSE Labs.
3 * Thanks to Ben LaHaise for precious feedback.
4 */
5
6#include <linux/mm.h>
7#include <linux/sched.h>
8#include <linux/highmem.h>
9#include <linux/module.h>
10#include <linux/slab.h>
11#include <asm/uaccess.h>
12#include <asm/processor.h>
13#include <asm/tlbflush.h>
14#include <asm/pgalloc.h>
15#include <asm/sections.h>
16
17static DEFINE_SPINLOCK(cpa_lock);
18static struct list_head df_list = LIST_HEAD_INIT(df_list);
19
20
21pte_t *lookup_address(unsigned long address)
22{
23	pgd_t *pgd = pgd_offset_k(address);
24	pud_t *pud;
25	pmd_t *pmd;
26	if (pgd_none(*pgd))
27		return NULL;
28	pud = pud_offset(pgd, address);
29	if (pud_none(*pud))
30		return NULL;
31	pmd = pmd_offset(pud, address);
32	if (pmd_none(*pmd))
33		return NULL;
34	if (pmd_large(*pmd))
35		return (pte_t *)pmd;
36        return pte_offset_kernel(pmd, address);
37}
38
39static struct page *split_large_page(unsigned long address, pgprot_t prot,
40					pgprot_t ref_prot)
41{
42	int i;
43	unsigned long addr;
44	struct page *base;
45	pte_t *pbase;
46
47	spin_unlock_irq(&cpa_lock);
48	base = alloc_pages(GFP_KERNEL, 0);
49	spin_lock_irq(&cpa_lock);
50	if (!base)
51		return NULL;
52
53	/*
54	 * page_private is used to track the number of entries in
55	 * the page table page that have non standard attributes.
56	 */
57	SetPagePrivate(base);
58	page_private(base) = 0;
59
60	address = __pa(address);
61	addr = address & LARGE_PAGE_MASK;
62	pbase = (pte_t *)page_address(base);
63	paravirt_alloc_pt(page_to_pfn(base));
64	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
65               set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
66                                          addr == address ? prot : ref_prot));
67	}
68	return base;
69}
70
71static void cache_flush_page(struct page *p)
72{
73	unsigned long adr = (unsigned long)page_address(p);
74	int i;
75	for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
76		asm volatile("clflush (%0)" :: "r" (adr + i));
77}
78
79static void flush_kernel_map(void *arg)
80{
81	struct list_head *lh = (struct list_head *)arg;
82	struct page *p;
83
84	/* High level code is not ready for clflush yet */
85	if (0 && cpu_has_clflush) {
86		list_for_each_entry (p, lh, lru)
87			cache_flush_page(p);
88	} else if (boot_cpu_data.x86_model >= 4)
89		wbinvd();
90
91	__flush_tlb_all();
92}
93
94static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
95{
96	struct page *page;
97	unsigned long flags;
98
99	set_pte_atomic(kpte, pte); 	/* change init_mm */
100	if (SHARED_KERNEL_PMD)
101		return;
102
103	spin_lock_irqsave(&pgd_lock, flags);
104	for (page = pgd_list; page; page = (struct page *)page->index) {
105		pgd_t *pgd;
106		pud_t *pud;
107		pmd_t *pmd;
108		pgd = (pgd_t *)page_address(page) + pgd_index(address);
109		pud = pud_offset(pgd, address);
110		pmd = pmd_offset(pud, address);
111		set_pte_atomic((pte_t *)pmd, pte);
112	}
113	spin_unlock_irqrestore(&pgd_lock, flags);
114}
115
116/*
117 * No more special protections in this 2/4MB area - revert to a
118 * large page again.
119 */
120static inline void revert_page(struct page *kpte_page, unsigned long address)
121{
122	pgprot_t ref_prot;
123	pte_t *linear;
124
125	ref_prot =
126	((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
127		? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE;
128
129	linear = (pte_t *)
130		pmd_offset(pud_offset(pgd_offset_k(address), address), address);
131	set_pmd_pte(linear,  address,
132		    pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
133			    ref_prot));
134}
135
136static int
137__change_page_attr(struct page *page, pgprot_t prot)
138{
139	pte_t *kpte;
140	unsigned long address;
141	struct page *kpte_page;
142
143	BUG_ON(PageHighMem(page));
144	address = (unsigned long)page_address(page);
145
146	kpte = lookup_address(address);
147	if (!kpte)
148		return -EINVAL;
149	kpte_page = virt_to_page(kpte);
150	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
151		if (!pte_huge(*kpte)) {
152			set_pte_atomic(kpte, mk_pte(page, prot));
153		} else {
154			pgprot_t ref_prot;
155			struct page *split;
156
157			ref_prot =
158			((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
159				? PAGE_KERNEL_EXEC : PAGE_KERNEL;
160			split = split_large_page(address, prot, ref_prot);
161			if (!split)
162				return -ENOMEM;
163			set_pmd_pte(kpte,address,mk_pte(split, ref_prot));
164			kpte_page = split;
165		}
166		page_private(kpte_page)++;
167	} else if (!pte_huge(*kpte)) {
168		set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
169		BUG_ON(page_private(kpte_page) == 0);
170		page_private(kpte_page)--;
171	} else
172		BUG();
173
174	/*
175	 * If the pte was reserved, it means it was created at boot
176	 * time (not via split_large_page) and in turn we must not
177	 * replace it with a largepage.
178	 */
179	if (!PageReserved(kpte_page)) {
180		if (cpu_has_pse && (page_private(kpte_page) == 0)) {
181			ClearPagePrivate(kpte_page);
182			paravirt_release_pt(page_to_pfn(kpte_page));
183			list_add(&kpte_page->lru, &df_list);
184			revert_page(kpte_page, address);
185		}
186	}
187	return 0;
188}
189
190static inline void flush_map(struct list_head *l)
191{
192	on_each_cpu(flush_kernel_map, l, 1, 1);
193}
194
195/*
196 * Change the page attributes of an page in the linear mapping.
197 *
198 * This should be used when a page is mapped with a different caching policy
199 * than write-back somewhere - some CPUs do not like it when mappings with
200 * different caching policies exist. This changes the page attributes of the
201 * in kernel linear mapping too.
202 *
203 * The caller needs to ensure that there are no conflicting mappings elsewhere.
204 * This function only deals with the kernel linear map.
205 *
206 * Caller must call global_flush_tlb() after this.
207 */
208int change_page_attr(struct page *page, int numpages, pgprot_t prot)
209{
210	int err = 0;
211	int i;
212	unsigned long flags;
213
214	spin_lock_irqsave(&cpa_lock, flags);
215	for (i = 0; i < numpages; i++, page++) {
216		err = __change_page_attr(page, prot);
217		if (err)
218			break;
219	}
220	spin_unlock_irqrestore(&cpa_lock, flags);
221	return err;
222}
223
224void global_flush_tlb(void)
225{
226	struct list_head l;
227	struct page *pg, *next;
228
229	BUG_ON(irqs_disabled());
230
231	spin_lock_irq(&cpa_lock);
232	list_replace_init(&df_list, &l);
233	spin_unlock_irq(&cpa_lock);
234	flush_map(&l);
235	list_for_each_entry_safe(pg, next, &l, lru) {
236		__free_page(pg);
237	}
238}
239
240#ifdef CONFIG_DEBUG_PAGEALLOC
241void kernel_map_pages(struct page *page, int numpages, int enable)
242{
243	if (PageHighMem(page))
244		return;
245	if (!enable)
246		debug_check_no_locks_freed(page_address(page),
247					   numpages * PAGE_SIZE);
248
249	/* the return value is ignored - the calls cannot fail,
250	 * large pages are disabled at boot time.
251	 */
252	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
253	/* we should perform an IPI and flush all tlbs,
254	 * but that can deadlock->flush only current cpu.
255	 */
256	__flush_tlb_all();
257}
258#endif
259
260EXPORT_SYMBOL(change_page_attr);
261EXPORT_SYMBOL(global_flush_tlb);
262