1 2 3#include <linux/mm.h> 4#include <linux/interrupt.h> 5#include <linux/module.h> 6#include <asm/uaccess.h> 7 8extern int find_fixup_code(struct pt_regs *); 9extern void die_if_kernel(const char *, struct pt_regs *, long); 10extern int raw_printk(const char *fmt, ...); 11 12/* debug of low-level TLB reload */ 13#undef DEBUG 14 15#ifdef DEBUG 16#define D(x) x 17#else 18#define D(x) 19#endif 20 21/* debug of higher-level faults */ 22#define DPG(x) 23 24/* current active page directory */ 25 26volatile DEFINE_PER_CPU(pgd_t *,current_pgd); 27unsigned long cris_signal_return_page; 28 29/* 30 * This routine handles page faults. It determines the address, 31 * and the problem, and then passes it off to one of the appropriate 32 * routines. 33 * 34 * Notice that the address we're given is aligned to the page the fault 35 * occurred in, since we only get the PFN in R_MMU_CAUSE not the complete 36 * address. 37 * 38 * error_code: 39 * bit 0 == 0 means no page found, 1 means protection fault 40 * bit 1 == 0 means read, 1 means write 41 * 42 * If this routine detects a bad access, it returns 1, otherwise it 43 * returns 0. 44 */ 45 46asmlinkage void 47do_page_fault(unsigned long address, struct pt_regs *regs, 48 int protection, int writeaccess) 49{ 50 struct task_struct *tsk; 51 struct mm_struct *mm; 52 struct vm_area_struct * vma; 53 siginfo_t info; 54 55 D(printk("Page fault for %lX on %X at %lX, prot %d write %d\n", 56 address, smp_processor_id(), instruction_pointer(regs), 57 protection, writeaccess)); 58 59 tsk = current; 60 61 /* 62 * We fault-in kernel-space virtual memory on-demand. The 63 * 'reference' page table is init_mm.pgd. 64 * 65 * NOTE! We MUST NOT take any locks for this case. We may 66 * be in an interrupt or a critical region, and should 67 * only copy the information from the master page table, 68 * nothing more. 69 * 70 * NOTE2: This is done so that, when updating the vmalloc 71 * mappings we don't have to walk all processes pgdirs and 72 * add the high mappings all at once. Instead we do it as they 73 * are used. However vmalloc'ed page entries have the PAGE_GLOBAL 74 * bit set so sometimes the TLB can use a lingering entry. 75 * 76 * This verifies that the fault happens in kernel space 77 * and that the fault was not a protection error (error_code & 1). 78 */ 79 80 if (address >= VMALLOC_START && 81 !protection && 82 !user_mode(regs)) 83 goto vmalloc_fault; 84 85 /* When stack execution is not allowed we store the signal 86 * trampolines in the reserved cris_signal_return_page. 87 * Handle this in the exact same way as vmalloc (we know 88 * that the mapping is there and is valid so no need to 89 * call handle_mm_fault). 90 */ 91 if (cris_signal_return_page && 92 address == cris_signal_return_page && 93 !protection && user_mode(regs)) 94 goto vmalloc_fault; 95 96 /* we can and should enable interrupts at this point */ 97 local_irq_enable(); 98 99 mm = tsk->mm; 100 info.si_code = SEGV_MAPERR; 101 102 /* 103 * If we're in an interrupt or have no user 104 * context, we must not take the fault.. 105 */ 106 107 if (in_atomic() || !mm) 108 goto no_context; 109 110 down_read(&mm->mmap_sem); 111 vma = find_vma(mm, address); 112 if (!vma) 113 goto bad_area; 114 if (vma->vm_start <= address) 115 goto good_area; 116 if (!(vma->vm_flags & VM_GROWSDOWN)) 117 goto bad_area; 118 if (user_mode(regs)) { 119 /* 120 * accessing the stack below usp is always a bug. 121 * we get page-aligned addresses so we can only check 122 * if we're within a page from usp, but that might be 123 * enough to catch brutal errors at least. 124 */ 125 if (address + PAGE_SIZE < rdusp()) 126 goto bad_area; 127 } 128 if (expand_stack(vma, address)) 129 goto bad_area; 130 131 /* 132 * Ok, we have a good vm_area for this memory access, so 133 * we can handle it.. 134 */ 135 136 good_area: 137 info.si_code = SEGV_ACCERR; 138 139 /* first do some preliminary protection checks */ 140 141 if (writeaccess == 2){ 142 if (!(vma->vm_flags & VM_EXEC)) 143 goto bad_area; 144 } else if (writeaccess == 1) { 145 if (!(vma->vm_flags & VM_WRITE)) 146 goto bad_area; 147 } else { 148 if (!(vma->vm_flags & (VM_READ | VM_EXEC))) 149 goto bad_area; 150 } 151 152 /* 153 * If for any reason at all we couldn't handle the fault, 154 * make sure we exit gracefully rather than endlessly redo 155 * the fault. 156 */ 157 158 switch (handle_mm_fault(mm, vma, address, writeaccess & 1)) { 159 case VM_FAULT_MINOR: 160 tsk->min_flt++; 161 break; 162 case VM_FAULT_MAJOR: 163 tsk->maj_flt++; 164 break; 165 case VM_FAULT_SIGBUS: 166 goto do_sigbus; 167 default: 168 goto out_of_memory; 169 } 170 171 up_read(&mm->mmap_sem); 172 return; 173 174 /* 175 * Something tried to access memory that isn't in our memory map.. 176 * Fix it, but check if it's kernel or user first.. 177 */ 178 179 bad_area: 180 up_read(&mm->mmap_sem); 181 182 bad_area_nosemaphore: 183 DPG(show_registers(regs)); 184 185 /* User mode accesses just cause a SIGSEGV */ 186 187 if (user_mode(regs)) { 188 info.si_signo = SIGSEGV; 189 info.si_errno = 0; 190 /* info.si_code has been set above */ 191 info.si_addr = (void *)address; 192 force_sig_info(SIGSEGV, &info, tsk); 193 return; 194 } 195 196 no_context: 197 198 /* Are we prepared to handle this kernel fault? 199 * 200 * (The kernel has valid exception-points in the source 201 * when it acesses user-memory. When it fails in one 202 * of those points, we find it in a table and do a jump 203 * to some fixup code that loads an appropriate error 204 * code) 205 */ 206 207 if (find_fixup_code(regs)) 208 return; 209 210 /* 211 * Oops. The kernel tried to access some bad page. We'll have to 212 * terminate things with extreme prejudice. 213 */ 214 215 if ((unsigned long) (address) < PAGE_SIZE) 216 raw_printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); 217 else 218 raw_printk(KERN_ALERT "Unable to handle kernel access"); 219 raw_printk(" at virtual address %08lx\n",address); 220 221 die_if_kernel("Oops", regs, (writeaccess << 1) | protection); 222 223 do_exit(SIGKILL); 224 225 /* 226 * We ran out of memory, or some other thing happened to us that made 227 * us unable to handle the page fault gracefully. 228 */ 229 230 out_of_memory: 231 up_read(&mm->mmap_sem); 232 printk("VM: killing process %s\n", tsk->comm); 233 if (user_mode(regs)) 234 do_exit(SIGKILL); 235 goto no_context; 236 237 do_sigbus: 238 up_read(&mm->mmap_sem); 239 240 /* 241 * Send a sigbus, regardless of whether we were in kernel 242 * or user mode. 243 */ 244 info.si_signo = SIGBUS; 245 info.si_errno = 0; 246 info.si_code = BUS_ADRERR; 247 info.si_addr = (void *)address; 248 force_sig_info(SIGBUS, &info, tsk); 249 250 /* Kernel mode? Handle exceptions or die */ 251 if (!user_mode(regs)) 252 goto no_context; 253 return; 254 255vmalloc_fault: 256 { 257 /* 258 * Synchronize this task's top level page-table 259 * with the 'reference' page table. 260 * 261 * Use current_pgd instead of tsk->active_mm->pgd 262 * since the latter might be unavailable if this 263 * code is executed in a misfortunately run irq 264 * (like inside schedule() between switch_mm and 265 * switch_to...). 266 */ 267 268 int offset = pgd_index(address); 269 pgd_t *pgd, *pgd_k; 270 pud_t *pud, *pud_k; 271 pmd_t *pmd, *pmd_k; 272 pte_t *pte_k; 273 274 pgd = (pgd_t *)per_cpu(current_pgd, smp_processor_id()) + offset; 275 pgd_k = init_mm.pgd + offset; 276 277 /* Since we're two-level, we don't need to do both 278 * set_pgd and set_pmd (they do the same thing). If 279 * we go three-level at some point, do the right thing 280 * with pgd_present and set_pgd here. 281 * 282 * Also, since the vmalloc area is global, we don't 283 * need to copy individual PTE's, it is enough to 284 * copy the pgd pointer into the pte page of the 285 * root task. If that is there, we'll find our pte if 286 * it exists. 287 */ 288 289 pud = pud_offset(pgd, address); 290 pud_k = pud_offset(pgd_k, address); 291 if (!pud_present(*pud_k)) 292 goto no_context; 293 294 pmd = pmd_offset(pud, address); 295 pmd_k = pmd_offset(pud_k, address); 296 297 if (!pmd_present(*pmd_k)) 298 goto bad_area_nosemaphore; 299 300 set_pmd(pmd, *pmd_k); 301 302 /* Make sure the actual PTE exists as well to 303 * catch kernel vmalloc-area accesses to non-mapped 304 * addresses. If we don't do this, this will just 305 * silently loop forever. 306 */ 307 308 pte_k = pte_offset_kernel(pmd_k, address); 309 if (!pte_present(*pte_k)) 310 goto no_context; 311 312 return; 313 } 314} 315 316/* Find fixup code. */ 317int 318find_fixup_code(struct pt_regs *regs) 319{ 320 const struct exception_table_entry *fixup; 321 322 if ((fixup = search_exception_tables(instruction_pointer(regs))) != 0) { 323 /* Adjust the instruction pointer in the stackframe. */ 324 instruction_pointer(regs) = fixup->fixup; 325 arch_fixup(regs); 326 return 1; 327 } 328 329 return 0; 330} 331