1
2
3#include <linux/mm.h>
4#include <linux/interrupt.h>
5#include <linux/module.h>
6#include <asm/uaccess.h>
7
8extern int find_fixup_code(struct pt_regs *);
9extern void die_if_kernel(const char *, struct pt_regs *, long);
10extern int raw_printk(const char *fmt, ...);
11
12/* debug of low-level TLB reload */
13#undef DEBUG
14
15#ifdef DEBUG
16#define D(x) x
17#else
18#define D(x)
19#endif
20
21/* debug of higher-level faults */
22#define DPG(x)
23
24/* current active page directory */
25
26volatile DEFINE_PER_CPU(pgd_t *,current_pgd);
27unsigned long cris_signal_return_page;
28
29/*
30 * This routine handles page faults.  It determines the address,
31 * and the problem, and then passes it off to one of the appropriate
32 * routines.
33 *
34 * Notice that the address we're given is aligned to the page the fault
35 * occurred in, since we only get the PFN in R_MMU_CAUSE not the complete
36 * address.
37 *
38 * error_code:
39 *	bit 0 == 0 means no page found, 1 means protection fault
40 *	bit 1 == 0 means read, 1 means write
41 *
42 * If this routine detects a bad access, it returns 1, otherwise it
43 * returns 0.
44 */
45
46asmlinkage void
47do_page_fault(unsigned long address, struct pt_regs *regs,
48	      int protection, int writeaccess)
49{
50	struct task_struct *tsk;
51	struct mm_struct *mm;
52	struct vm_area_struct * vma;
53	siginfo_t info;
54
55        D(printk("Page fault for %lX on %X at %lX, prot %d write %d\n",
56                 address, smp_processor_id(), instruction_pointer(regs),
57                 protection, writeaccess));
58
59	tsk = current;
60
61	/*
62	 * We fault-in kernel-space virtual memory on-demand. The
63	 * 'reference' page table is init_mm.pgd.
64	 *
65	 * NOTE! We MUST NOT take any locks for this case. We may
66	 * be in an interrupt or a critical region, and should
67	 * only copy the information from the master page table,
68	 * nothing more.
69	 *
70	 * NOTE2: This is done so that, when updating the vmalloc
71	 * mappings we don't have to walk all processes pgdirs and
72	 * add the high mappings all at once. Instead we do it as they
73	 * are used. However vmalloc'ed page entries have the PAGE_GLOBAL
74	 * bit set so sometimes the TLB can use a lingering entry.
75	 *
76	 * This verifies that the fault happens in kernel space
77	 * and that the fault was not a protection error (error_code & 1).
78	 */
79
80	if (address >= VMALLOC_START &&
81	    !protection &&
82	    !user_mode(regs))
83		goto vmalloc_fault;
84
85	/* When stack execution is not allowed we store the signal
86	 * trampolines in the reserved cris_signal_return_page.
87	 * Handle this in the exact same way as vmalloc (we know
88	 * that the mapping is there and is valid so no need to
89	 * call handle_mm_fault).
90	 */
91	if (cris_signal_return_page &&
92	    address == cris_signal_return_page &&
93	    !protection && user_mode(regs))
94		goto vmalloc_fault;
95
96	/* we can and should enable interrupts at this point */
97	local_irq_enable();
98
99	mm = tsk->mm;
100	info.si_code = SEGV_MAPERR;
101
102	/*
103	 * If we're in an interrupt or have no user
104	 * context, we must not take the fault..
105	 */
106
107	if (in_atomic() || !mm)
108		goto no_context;
109
110	down_read(&mm->mmap_sem);
111	vma = find_vma(mm, address);
112	if (!vma)
113		goto bad_area;
114	if (vma->vm_start <= address)
115		goto good_area;
116	if (!(vma->vm_flags & VM_GROWSDOWN))
117		goto bad_area;
118	if (user_mode(regs)) {
119		/*
120		 * accessing the stack below usp is always a bug.
121		 * we get page-aligned addresses so we can only check
122		 * if we're within a page from usp, but that might be
123		 * enough to catch brutal errors at least.
124		 */
125		if (address + PAGE_SIZE < rdusp())
126			goto bad_area;
127	}
128	if (expand_stack(vma, address))
129		goto bad_area;
130
131	/*
132	 * Ok, we have a good vm_area for this memory access, so
133	 * we can handle it..
134	 */
135
136 good_area:
137	info.si_code = SEGV_ACCERR;
138
139	/* first do some preliminary protection checks */
140
141	if (writeaccess == 2){
142		if (!(vma->vm_flags & VM_EXEC))
143			goto bad_area;
144	} else if (writeaccess == 1) {
145		if (!(vma->vm_flags & VM_WRITE))
146			goto bad_area;
147	} else {
148		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
149			goto bad_area;
150	}
151
152	/*
153	 * If for any reason at all we couldn't handle the fault,
154	 * make sure we exit gracefully rather than endlessly redo
155	 * the fault.
156	 */
157
158	switch (handle_mm_fault(mm, vma, address, writeaccess & 1)) {
159	case VM_FAULT_MINOR:
160		tsk->min_flt++;
161		break;
162	case VM_FAULT_MAJOR:
163		tsk->maj_flt++;
164		break;
165	case VM_FAULT_SIGBUS:
166		goto do_sigbus;
167	default:
168		goto out_of_memory;
169	}
170
171	up_read(&mm->mmap_sem);
172	return;
173
174	/*
175	 * Something tried to access memory that isn't in our memory map..
176	 * Fix it, but check if it's kernel or user first..
177	 */
178
179 bad_area:
180	up_read(&mm->mmap_sem);
181
182 bad_area_nosemaphore:
183	DPG(show_registers(regs));
184
185	/* User mode accesses just cause a SIGSEGV */
186
187	if (user_mode(regs)) {
188		info.si_signo = SIGSEGV;
189		info.si_errno = 0;
190		/* info.si_code has been set above */
191		info.si_addr = (void *)address;
192		force_sig_info(SIGSEGV, &info, tsk);
193		return;
194	}
195
196 no_context:
197
198	/* Are we prepared to handle this kernel fault?
199	 *
200	 * (The kernel has valid exception-points in the source
201	 *  when it acesses user-memory. When it fails in one
202	 *  of those points, we find it in a table and do a jump
203	 *  to some fixup code that loads an appropriate error
204	 *  code)
205	 */
206
207	if (find_fixup_code(regs))
208		return;
209
210	/*
211	 * Oops. The kernel tried to access some bad page. We'll have to
212	 * terminate things with extreme prejudice.
213	 */
214
215	if ((unsigned long) (address) < PAGE_SIZE)
216		raw_printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
217	else
218		raw_printk(KERN_ALERT "Unable to handle kernel access");
219	raw_printk(" at virtual address %08lx\n",address);
220
221	die_if_kernel("Oops", regs, (writeaccess << 1) | protection);
222
223	do_exit(SIGKILL);
224
225	/*
226	 * We ran out of memory, or some other thing happened to us that made
227	 * us unable to handle the page fault gracefully.
228	 */
229
230 out_of_memory:
231	up_read(&mm->mmap_sem);
232	printk("VM: killing process %s\n", tsk->comm);
233	if (user_mode(regs))
234		do_exit(SIGKILL);
235	goto no_context;
236
237 do_sigbus:
238	up_read(&mm->mmap_sem);
239
240	/*
241	 * Send a sigbus, regardless of whether we were in kernel
242	 * or user mode.
243	 */
244	info.si_signo = SIGBUS;
245	info.si_errno = 0;
246	info.si_code = BUS_ADRERR;
247	info.si_addr = (void *)address;
248	force_sig_info(SIGBUS, &info, tsk);
249
250	/* Kernel mode? Handle exceptions or die */
251	if (!user_mode(regs))
252		goto no_context;
253	return;
254
255vmalloc_fault:
256	{
257		/*
258		 * Synchronize this task's top level page-table
259		 * with the 'reference' page table.
260		 *
261		 * Use current_pgd instead of tsk->active_mm->pgd
262		 * since the latter might be unavailable if this
263		 * code is executed in a misfortunately run irq
264		 * (like inside schedule() between switch_mm and
265		 *  switch_to...).
266		 */
267
268		int offset = pgd_index(address);
269		pgd_t *pgd, *pgd_k;
270		pud_t *pud, *pud_k;
271		pmd_t *pmd, *pmd_k;
272		pte_t *pte_k;
273
274		pgd = (pgd_t *)per_cpu(current_pgd, smp_processor_id()) + offset;
275		pgd_k = init_mm.pgd + offset;
276
277		/* Since we're two-level, we don't need to do both
278		 * set_pgd and set_pmd (they do the same thing). If
279		 * we go three-level at some point, do the right thing
280		 * with pgd_present and set_pgd here.
281		 *
282		 * Also, since the vmalloc area is global, we don't
283		 * need to copy individual PTE's, it is enough to
284		 * copy the pgd pointer into the pte page of the
285		 * root task. If that is there, we'll find our pte if
286		 * it exists.
287		 */
288
289		pud = pud_offset(pgd, address);
290		pud_k = pud_offset(pgd_k, address);
291		if (!pud_present(*pud_k))
292			goto no_context;
293
294		pmd = pmd_offset(pud, address);
295		pmd_k = pmd_offset(pud_k, address);
296
297		if (!pmd_present(*pmd_k))
298			goto bad_area_nosemaphore;
299
300		set_pmd(pmd, *pmd_k);
301
302		/* Make sure the actual PTE exists as well to
303		 * catch kernel vmalloc-area accesses to non-mapped
304		 * addresses. If we don't do this, this will just
305		 * silently loop forever.
306		 */
307
308		pte_k = pte_offset_kernel(pmd_k, address);
309		if (!pte_present(*pte_k))
310			goto no_context;
311
312		return;
313	}
314}
315
316/* Find fixup code. */
317int
318find_fixup_code(struct pt_regs *regs)
319{
320	const struct exception_table_entry *fixup;
321
322	if ((fixup = search_exception_tables(instruction_pointer(regs))) != 0) {
323		/* Adjust the instruction pointer in the stackframe. */
324		instruction_pointer(regs) = fixup->fixup;
325		arch_fixup(regs);
326		return 1;
327	}
328
329	return 0;
330}
331