1/*
2 *  mm/mprotect.c
3 *
4 *  (C) Copyright 1994 Linus Torvalds
5 *  (C) Copyright 2002 Christoph Hellwig
6 *
7 *  Address space accounting code	<alan@lxorguk.ukuu.org.uk>
8 *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
9 */
10
11#include <linux/mm.h>
12#include <linux/hugetlb.h>
13#include <linux/shm.h>
14#include <linux/mman.h>
15#include <linux/fs.h>
16#include <linux/highmem.h>
17#include <linux/security.h>
18#include <linux/mempolicy.h>
19#include <linux/personality.h>
20#include <linux/syscalls.h>
21#include <linux/swap.h>
22#include <linux/swapops.h>
23#include <linux/mmu_notifier.h>
24#include <linux/migrate.h>
25#include <linux/perf_event.h>
26#include <asm/uaccess.h>
27#include <asm/pgtable.h>
28#include <asm/cacheflush.h>
29#include <asm/tlbflush.h>
30
31#ifndef pgprot_modify
32static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
33{
34	return newprot;
35}
36#endif
37
38static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
39		unsigned long addr, unsigned long end, pgprot_t newprot,
40		int dirty_accountable)
41{
42	pte_t *pte, oldpte;
43	spinlock_t *ptl;
44
45	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
46	arch_enter_lazy_mmu_mode();
47	do {
48		oldpte = *pte;
49		if (pte_present(oldpte)) {
50			pte_t ptent;
51
52			ptent = ptep_modify_prot_start(mm, addr, pte);
53			ptent = pte_modify(ptent, newprot);
54
55			/*
56			 * Avoid taking write faults for pages we know to be
57			 * dirty.
58			 */
59			if (dirty_accountable && pte_dirty(ptent))
60				ptent = pte_mkwrite(ptent);
61
62			ptep_modify_prot_commit(mm, addr, pte, ptent);
63		} else if (PAGE_MIGRATION && !pte_file(oldpte)) {
64			swp_entry_t entry = pte_to_swp_entry(oldpte);
65
66			if (is_write_migration_entry(entry)) {
67				/*
68				 * A protection check is difficult so
69				 * just be safe and disable write
70				 */
71				make_migration_entry_read(&entry);
72				set_pte_at(mm, addr, pte,
73					swp_entry_to_pte(entry));
74			}
75		}
76	} while (pte++, addr += PAGE_SIZE, addr != end);
77	arch_leave_lazy_mmu_mode();
78	pte_unmap_unlock(pte - 1, ptl);
79}
80
81static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
82		unsigned long addr, unsigned long end, pgprot_t newprot,
83		int dirty_accountable)
84{
85	pmd_t *pmd;
86	unsigned long next;
87
88	pmd = pmd_offset(pud, addr);
89	do {
90		next = pmd_addr_end(addr, end);
91		if (pmd_none_or_clear_bad(pmd))
92			continue;
93		change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
94	} while (pmd++, addr = next, addr != end);
95}
96
97static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd,
98		unsigned long addr, unsigned long end, pgprot_t newprot,
99		int dirty_accountable)
100{
101	pud_t *pud;
102	unsigned long next;
103
104	pud = pud_offset(pgd, addr);
105	do {
106		next = pud_addr_end(addr, end);
107		if (pud_none_or_clear_bad(pud))
108			continue;
109		change_pmd_range(mm, pud, addr, next, newprot, dirty_accountable);
110	} while (pud++, addr = next, addr != end);
111}
112
113static void change_protection(struct vm_area_struct *vma,
114		unsigned long addr, unsigned long end, pgprot_t newprot,
115		int dirty_accountable)
116{
117	struct mm_struct *mm = vma->vm_mm;
118	pgd_t *pgd;
119	unsigned long next;
120	unsigned long start = addr;
121
122	BUG_ON(addr >= end);
123	pgd = pgd_offset(mm, addr);
124	flush_cache_range(vma, addr, end);
125	do {
126		next = pgd_addr_end(addr, end);
127		if (pgd_none_or_clear_bad(pgd))
128			continue;
129		change_pud_range(mm, pgd, addr, next, newprot, dirty_accountable);
130	} while (pgd++, addr = next, addr != end);
131	flush_tlb_range(vma, start, end);
132}
133
134int
135mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
136	unsigned long start, unsigned long end, unsigned long newflags)
137{
138	struct mm_struct *mm = vma->vm_mm;
139	unsigned long oldflags = vma->vm_flags;
140	long nrpages = (end - start) >> PAGE_SHIFT;
141	unsigned long charged = 0;
142	pgoff_t pgoff;
143	int error;
144	int dirty_accountable = 0;
145
146	if (newflags == oldflags) {
147		*pprev = vma;
148		return 0;
149	}
150
151	/*
152	 * If we make a private mapping writable we increase our commit;
153	 * but (without finer accounting) cannot reduce our commit if we
154	 * make it unwritable again. hugetlb mapping were accounted for
155	 * even if read-only so there is no need to account for them here
156	 */
157	if (newflags & VM_WRITE) {
158		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
159						VM_SHARED|VM_NORESERVE))) {
160			charged = nrpages;
161			if (security_vm_enough_memory(charged))
162				return -ENOMEM;
163			newflags |= VM_ACCOUNT;
164		}
165	}
166
167	/*
168	 * First try to merge with previous and/or next vma.
169	 */
170	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
171	*pprev = vma_merge(mm, *pprev, start, end, newflags,
172			vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
173	if (*pprev) {
174		vma = *pprev;
175		goto success;
176	}
177
178	*pprev = vma;
179
180	if (start != vma->vm_start) {
181		error = split_vma(mm, vma, start, 1);
182		if (error)
183			goto fail;
184	}
185
186	if (end != vma->vm_end) {
187		error = split_vma(mm, vma, end, 0);
188		if (error)
189			goto fail;
190	}
191
192success:
193	/*
194	 * vm_flags and vm_page_prot are protected by the mmap_sem
195	 * held in write mode.
196	 */
197	vma->vm_flags = newflags;
198	vma->vm_page_prot = pgprot_modify(vma->vm_page_prot,
199					  vm_get_page_prot(newflags));
200
201	if (vma_wants_writenotify(vma)) {
202		vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED);
203		dirty_accountable = 1;
204	}
205
206	mmu_notifier_invalidate_range_start(mm, start, end);
207	if (is_vm_hugetlb_page(vma))
208		hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
209	else
210		change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
211	mmu_notifier_invalidate_range_end(mm, start, end);
212	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
213	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
214	perf_event_mmap(vma);
215	return 0;
216
217fail:
218	vm_unacct_memory(charged);
219	return error;
220}
221
222SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
223		unsigned long, prot)
224{
225	unsigned long vm_flags, nstart, end, tmp, reqprot;
226	struct vm_area_struct *vma, *prev;
227	int error = -EINVAL;
228	const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
229	prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
230	if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
231		return -EINVAL;
232
233	if (start & ~PAGE_MASK)
234		return -EINVAL;
235	if (!len)
236		return 0;
237	len = PAGE_ALIGN(len);
238	end = start + len;
239	if (end <= start)
240		return -ENOMEM;
241	if (!arch_validate_prot(prot))
242		return -EINVAL;
243
244	reqprot = prot;
245	/*
246	 * Does the application expect PROT_READ to imply PROT_EXEC:
247	 */
248	if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
249		prot |= PROT_EXEC;
250
251	vm_flags = calc_vm_prot_bits(prot);
252
253	down_write(&current->mm->mmap_sem);
254
255	vma = find_vma_prev(current->mm, start, &prev);
256	error = -ENOMEM;
257	if (!vma)
258		goto out;
259	if (unlikely(grows & PROT_GROWSDOWN)) {
260		if (vma->vm_start >= end)
261			goto out;
262		start = vma->vm_start;
263		error = -EINVAL;
264		if (!(vma->vm_flags & VM_GROWSDOWN))
265			goto out;
266	}
267	else {
268		if (vma->vm_start > start)
269			goto out;
270		if (unlikely(grows & PROT_GROWSUP)) {
271			end = vma->vm_end;
272			error = -EINVAL;
273			if (!(vma->vm_flags & VM_GROWSUP))
274				goto out;
275		}
276	}
277	if (start > vma->vm_start)
278		prev = vma;
279
280	for (nstart = start ; ; ) {
281		unsigned long newflags;
282
283		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
284
285		newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
286
287		/* newflags >> 4 shift VM_MAY% in place of VM_% */
288		if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
289			error = -EACCES;
290			goto out;
291		}
292
293		error = security_file_mprotect(vma, reqprot, prot);
294		if (error)
295			goto out;
296
297		tmp = vma->vm_end;
298		if (tmp > end)
299			tmp = end;
300		error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
301		if (error)
302			goto out;
303		nstart = tmp;
304
305		if (nstart < prev->vm_end)
306			nstart = prev->vm_end;
307		if (nstart >= end)
308			goto out;
309
310		vma = prev->vm_next;
311		if (!vma || vma->vm_start != nstart) {
312			error = -ENOMEM;
313			goto out;
314		}
315	}
316out:
317	up_write(&current->mm->mmap_sem);
318	return error;
319}
320