1/* 2 * linux/arch/i386/mm/fault.c 3 * 4 * Copyright (C) 1995 Linus Torvalds 5 */ 6 7#include <linux/signal.h> 8#include <linux/sched.h> 9#include <linux/kernel.h> 10#include <linux/errno.h> 11#include <linux/string.h> 12#include <linux/types.h> 13#include <linux/ptrace.h> 14#include <linux/mman.h> 15#include <linux/mm.h> 16#include <linux/smp.h> 17#include <linux/smp_lock.h> 18#include <linux/interrupt.h> 19#include <linux/init.h> 20#include <linux/tty.h> 21#include <linux/vt_kern.h> /* For unblank_screen() */ 22 23#include <asm/system.h> 24#include <asm/uaccess.h> 25#include <asm/pgalloc.h> 26#include <asm/hardirq.h> 27 28extern void die(const char *,struct pt_regs *,long); 29 30/* 31 * Ugly, ugly, but the goto's result in better assembly.. 32 */ 33int __verify_write(const void * addr, unsigned long size) 34{ 35 struct vm_area_struct * vma; 36 unsigned long start = (unsigned long) addr; 37 38 if (!size) 39 return 1; 40 41 vma = find_vma(current->mm, start); 42 if (!vma) 43 goto bad_area; 44 if (vma->vm_start > start) 45 goto check_stack; 46 47good_area: 48 if (!(vma->vm_flags & VM_WRITE)) 49 goto bad_area; 50 size--; 51 size += start & ~PAGE_MASK; 52 size >>= PAGE_SHIFT; 53 start &= PAGE_MASK; 54 55 for (;;) { 56 survive: 57 { 58 int fault = handle_mm_fault(current->mm, vma, start, 1); 59 if (!fault) 60 goto bad_area; 61 if (fault < 0) 62 goto out_of_memory; 63 } 64 if (!size) 65 break; 66 size--; 67 start += PAGE_SIZE; 68 if (start < vma->vm_end) 69 continue; 70 vma = vma->vm_next; 71 if (!vma || vma->vm_start != start) 72 goto bad_area; 73 if (!(vma->vm_flags & VM_WRITE)) 74 goto bad_area;; 75 } 76 return 1; 77 78check_stack: 79 if (!(vma->vm_flags & VM_GROWSDOWN)) 80 goto bad_area; 81 if (expand_stack(vma, start) == 0) 82 goto good_area; 83 84bad_area: 85 return 0; 86 87out_of_memory: 88 if (current->pid == 1) { 89 yield(); 90 goto survive; 91 } 92 goto bad_area; 93} 94 95extern spinlock_t timerlist_lock; 96 97/* 98 * Unlock any spinlocks which will prevent us from getting the 99 * message out (timerlist_lock is acquired through the 100 * console unblank code) 101 */ 102void bust_spinlocks(int yes) 103{ 104 spin_lock_init(&timerlist_lock); 105 if (yes) { 106 oops_in_progress = 1; 107#ifdef CONFIG_SMP 108 global_irq_lock = 0; /* Many serial drivers do __global_cli() */ 109#endif 110 } else { 111 int loglevel_save = console_loglevel; 112#ifdef CONFIG_VT 113 unblank_screen(); 114#endif 115 oops_in_progress = 0; 116 /* 117 * OK, the message is on the console. Now we call printk() 118 * without oops_in_progress set so that printk will give klogd 119 * a poke. Hold onto your hats... 120 */ 121 console_loglevel = 15; /* NMI oopser may have shut the console up */ 122 printk(" "); 123 console_loglevel = loglevel_save; 124 } 125} 126 127asmlinkage void do_invalid_op(struct pt_regs *, unsigned long); 128extern unsigned long idt; 129 130/* 131 * This routine handles page faults. It determines the address, 132 * and the problem, and then passes it off to one of the appropriate 133 * routines. 134 * 135 * error_code: 136 * bit 0 == 0 means no page found, 1 means protection fault 137 * bit 1 == 0 means read, 1 means write 138 * bit 2 == 0 means kernel, 1 means user-mode 139 */ 140asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) 141{ 142 struct task_struct *tsk; 143 struct mm_struct *mm; 144 struct vm_area_struct * vma; 145 unsigned long address; 146 unsigned long page; 147 unsigned long fixup; 148 int write; 149 siginfo_t info; 150 151 /* get the address */ 152 __asm__("movl %%cr2,%0":"=r" (address)); 153 154 /* It's safe to allow irq's after cr2 has been saved */ 155 if (regs->eflags & X86_EFLAGS_IF) 156 local_irq_enable(); 157 158 tsk = current; 159 160 /* 161 * We fault-in kernel-space virtual memory on-demand. The 162 * 'reference' page table is init_mm.pgd. 163 * 164 * NOTE! We MUST NOT take any locks for this case. We may 165 * be in an interrupt or a critical region, and should 166 * only copy the information from the master page table, 167 * nothing more. 168 * 169 * This verifies that the fault happens in kernel space 170 * (error_code & 4) == 0, and that the fault was not a 171 * protection error (error_code & 1) == 0. 172 */ 173 if (address >= TASK_SIZE && !(error_code & 5)) 174 goto vmalloc_fault; 175 176 mm = tsk->mm; 177 info.si_code = SEGV_MAPERR; 178 179 /* 180 * If we're in an interrupt or have no user 181 * context, we must not take the fault.. 182 */ 183 if (in_interrupt() || !mm) 184 goto no_context; 185 186 down_read(&mm->mmap_sem); 187 188 vma = find_vma(mm, address); 189 if (!vma) 190 goto bad_area; 191 if (vma->vm_start <= address) 192 goto good_area; 193 if (!(vma->vm_flags & VM_GROWSDOWN)) 194 goto bad_area; 195 if (error_code & 4) { 196 /* 197 * accessing the stack below %esp is always a bug. 198 * The "+ 32" is there due to some instructions (like 199 * pusha) doing post-decrement on the stack and that 200 * doesn't show up until later.. 201 */ 202 if (address + 32 < regs->esp) 203 goto bad_area; 204 } 205 if (expand_stack(vma, address)) 206 goto bad_area; 207/* 208 * Ok, we have a good vm_area for this memory access, so 209 * we can handle it.. 210 */ 211good_area: 212 info.si_code = SEGV_ACCERR; 213 write = 0; 214 switch (error_code & 3) { 215 default: /* 3: write, present */ 216#ifdef TEST_VERIFY_AREA 217 if (regs->cs == KERNEL_CS) 218 printk("WP fault at %08lx\n", regs->eip); 219#endif 220 /* fall through */ 221 case 2: /* write, not present */ 222 if (!(vma->vm_flags & VM_WRITE)) 223 goto bad_area; 224 write++; 225 break; 226 case 1: /* read, present */ 227 goto bad_area; 228 case 0: /* read, not present */ 229 if (!(vma->vm_flags & (VM_READ | VM_EXEC))) 230 goto bad_area; 231 } 232 233 survive: 234 /* 235 * If for any reason at all we couldn't handle the fault, 236 * make sure we exit gracefully rather than endlessly redo 237 * the fault. 238 */ 239 switch (handle_mm_fault(mm, vma, address, write)) { 240 case 1: 241 tsk->min_flt++; 242 break; 243 case 2: 244 tsk->maj_flt++; 245 break; 246 case 0: 247 goto do_sigbus; 248 default: 249 goto out_of_memory; 250 } 251 252 /* 253 * Did it hit the DOS screen memory VA from vm86 mode? 254 */ 255 if (regs->eflags & VM_MASK) { 256 unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; 257 if (bit < 32) 258 tsk->thread.screen_bitmap |= 1 << bit; 259 } 260 up_read(&mm->mmap_sem); 261 return; 262 263/* 264 * Something tried to access memory that isn't in our memory map.. 265 * Fix it, but check if it's kernel or user first.. 266 */ 267bad_area: 268 up_read(&mm->mmap_sem); 269 270 /* User mode accesses just cause a SIGSEGV */ 271 if (error_code & 4) { 272 tsk->thread.cr2 = address; 273 tsk->thread.error_code = error_code; 274 tsk->thread.trap_no = 14; 275 info.si_signo = SIGSEGV; 276 info.si_errno = 0; 277 /* info.si_code has been set above */ 278 info.si_addr = (void *)address; 279 force_sig_info(SIGSEGV, &info, tsk); 280 return; 281 } 282 283 if (boot_cpu_data.f00f_bug) { 284 unsigned long nr; 285 286 nr = (address - idt) >> 3; 287 288 if (nr == 6) { 289 do_invalid_op(regs, 0); 290 return; 291 } 292 } 293 294no_context: 295 /* Are we prepared to handle this kernel fault? */ 296 if ((fixup = search_exception_table(regs->eip)) != 0) { 297 regs->eip = fixup; 298 return; 299 } 300 301/* 302 * Oops. The kernel tried to access some bad page. We'll have to 303 * terminate things with extreme prejudice. 304 */ 305 306 bust_spinlocks(1); 307 308 if (address < PAGE_SIZE) 309 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); 310 else 311 printk(KERN_ALERT "Unable to handle kernel paging request"); 312 printk(" at virtual address %08lx\n",address); 313 printk(" printing eip:\n"); 314 printk("%08lx\n", regs->eip); 315 asm("movl %%cr3,%0":"=r" (page)); 316 page = ((unsigned long *) __va(page))[address >> 22]; 317 printk(KERN_ALERT "*pde = %08lx\n", page); 318 if (page & 1) { 319 page &= PAGE_MASK; 320 address &= 0x003ff000; 321 page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; 322 printk(KERN_ALERT "*pte = %08lx\n", page); 323 } 324 die("Oops", regs, error_code); 325 bust_spinlocks(0); 326 do_exit(SIGKILL); 327 328/* 329 * We ran out of memory, or some other thing happened to us that made 330 * us unable to handle the page fault gracefully. 331 */ 332out_of_memory: 333 if (tsk->pid == 1) { 334 yield(); 335 goto survive; 336 } 337 up_read(&mm->mmap_sem); 338 printk("VM: killing process %s\n", tsk->comm); 339 if (error_code & 4) 340 do_exit(SIGKILL); 341 goto no_context; 342 343do_sigbus: 344 up_read(&mm->mmap_sem); 345 346 /* 347 * Send a sigbus, regardless of whether we were in kernel 348 * or user mode. 349 */ 350 tsk->thread.cr2 = address; 351 tsk->thread.error_code = error_code; 352 tsk->thread.trap_no = 14; 353 info.si_signo = SIGBUS; 354 info.si_errno = 0; 355 info.si_code = BUS_ADRERR; 356 info.si_addr = (void *)address; 357 force_sig_info(SIGBUS, &info, tsk); 358 359 /* Kernel mode? Handle exceptions or die */ 360 if (!(error_code & 4)) 361 goto no_context; 362 return; 363 364vmalloc_fault: 365 { 366 /* 367 * Synchronize this task's top level page-table 368 * with the 'reference' page table. 369 * 370 * Do _not_ use "tsk" here. We might be inside 371 * an interrupt in the middle of a task switch.. 372 */ 373 int offset = __pgd_offset(address); 374 pgd_t *pgd, *pgd_k; 375 pmd_t *pmd, *pmd_k; 376 pte_t *pte_k; 377 378 asm("movl %%cr3,%0":"=r" (pgd)); 379 pgd = offset + (pgd_t *)__va(pgd); 380 pgd_k = init_mm.pgd + offset; 381 382 if (!pgd_present(*pgd_k)) 383 goto no_context; 384 set_pgd(pgd, *pgd_k); 385 386 pmd = pmd_offset(pgd, address); 387 pmd_k = pmd_offset(pgd_k, address); 388 if (!pmd_present(*pmd_k)) 389 goto no_context; 390 set_pmd(pmd, *pmd_k); 391 392 pte_k = pte_offset(pmd_k, address); 393 if (!pte_present(*pte_k)) 394 goto no_context; 395 return; 396 } 397} 398