1/* 2 * linux/arch/x86-64/mm/fault.c 3 * 4 * Copyright (C) 1995 Linus Torvalds 5 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. 6 */ 7 8#include <linux/signal.h> 9#include <linux/sched.h> 10#include <linux/kernel.h> 11#include <linux/errno.h> 12#include <linux/string.h> 13#include <linux/types.h> 14#include <linux/ptrace.h> 15#include <linux/mman.h> 16#include <linux/mm.h> 17#include <linux/smp.h> 18#include <linux/smp_lock.h> 19#include <linux/interrupt.h> 20#include <linux/init.h> 21#include <linux/tty.h> 22#include <linux/vt_kern.h> /* For unblank_screen() */ 23#include <linux/compiler.h> 24 25#include <asm/system.h> 26#include <asm/uaccess.h> 27#include <asm/pgalloc.h> 28#include <asm/hardirq.h> 29#include <asm/smp.h> 30#include <asm/proto.h> 31#include <asm/kdebug.h> 32 33spinlock_t pcrash_lock; 34int crashing_cpu; 35 36extern spinlock_t console_lock, timerlist_lock; 37 38void bust_spinlocks(int yes) 39{ 40 spin_lock_init(&timerlist_lock); 41 if (yes) { 42 oops_in_progress = 1; 43#ifdef CONFIG_SMP 44 global_irq_lock = 0; /* Many serial drivers do __global_cli() */ 45#endif 46 } else { 47 int loglevel_save = console_loglevel; 48#ifdef CONFIG_VT 49 unblank_screen(); 50#endif 51 oops_in_progress = 0; 52 /* 53 * OK, the message is on the console. Now we call printk() 54 * without oops_in_progress set so that printk will give klogd 55 * a poke. Hold onto your hats... 56 */ 57 console_loglevel = 15; /* NMI oopser may have shut the console up */ 58 printk(" "); 59 console_loglevel = loglevel_save; 60 } 61} 62 63void dump_pagetable(unsigned long address) 64{ 65 static char *name[] = { "PML4", "PGD", "PDE", "PTE" }; 66 int i, shift; 67 unsigned long page; 68 69 shift = 9+9+9+12; 70 address &= ~0xFFFF000000000000UL; 71 asm("movq %%cr3,%0" : "=r" (page)); 72 for (i = 0; i < 4; i++) { 73 unsigned long *padr = (unsigned long *) __va(page); 74 padr += (address >> shift) & 0x1FFU; 75 if (__get_user(page, padr)) { 76 printk("%s: bad %p\n", name[i], padr); 77 break; 78 } 79 printk("%s: %016lx ", name[i], page); 80 if ((page & (1 | (1<<7))) != 1) /* Not present or 2MB page */ 81 break; 82 page &= ~0xFFFUL; 83 shift -= (i == 0) ? 12 : 9; 84 } 85 printk("\n"); 86} 87 88int page_fault_trace; 89int exception_trace = 1; 90 91/* 92 * This routine handles page faults. It determines the address, 93 * and the problem, and then passes it off to one of the appropriate 94 * routines. 95 * 96 * error_code: 97 * bit 0 == 0 means no page found, 1 means protection fault 98 * bit 1 == 0 means read, 1 means write 99 * bit 2 == 0 means kernel, 1 means user-mode 100 * bit 3 == 1 means fault was an instruction fetch 101 */ 102asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) 103{ 104 struct task_struct *tsk; 105 struct mm_struct *mm; 106 struct vm_area_struct * vma; 107 unsigned long address; 108 unsigned long fixup; 109 int write; 110 siginfo_t info; 111 112 /* get the address */ 113 __asm__("movq %%cr2,%0":"=r" (address)); 114 115#ifdef CONFIG_CHECKING 116 if (page_fault_trace) 117 printk("pfault %d rip:%lx rsp:%lx cs:%lu ss:%lu addr %lx error %lx\n", 118 stack_smp_processor_id(), regs->rip,regs->rsp,regs->cs, 119 regs->ss,address,error_code); 120 121 { 122 unsigned long gs; 123 struct x8664_pda *pda = cpu_pda + stack_smp_processor_id(); 124 rdmsrl(MSR_GS_BASE, gs); 125 if (gs != (unsigned long)pda) { 126 wrmsrl(MSR_GS_BASE, pda); 127 printk("page_fault: wrong gs %lx expected %p\n", gs, pda); 128 } 129 } 130#endif 131 132 tsk = current; 133 mm = tsk->mm; 134 info.si_code = SEGV_MAPERR; 135 136 /* 5 => page not present and from supervisor mode */ 137 if (unlikely(!(error_code & 5) && 138 ((address >= VMALLOC_START && address <= VMALLOC_END) || 139 (address >= MODULES_VADDR && address <= MODULES_END)))) 140 goto vmalloc_fault; 141 142 /* 143 * If we're in an interrupt or have no user 144 * context, we must not take the fault.. 145 */ 146 if (in_interrupt() || !mm) 147 goto no_context; 148 149again: 150 down_read(&mm->mmap_sem); 151 152 vma = find_vma(mm, address); 153 if (!vma) 154 goto bad_area; 155 if (vma->vm_start <= address) 156 goto good_area; 157 if (!(vma->vm_flags & VM_GROWSDOWN)) 158 goto bad_area; 159 if (error_code & 4) { 160 // XXX: align red zone size with ABI 161 if (address + 128 < regs->rsp) 162 goto bad_area; 163 } 164 if (expand_stack(vma, address)) 165 goto bad_area; 166/* 167 * Ok, we have a good vm_area for this memory access, so 168 * we can handle it.. 169 */ 170good_area: 171 info.si_code = SEGV_ACCERR; 172 write = 0; 173 switch (error_code & 3) { 174 default: /* 3: write, present */ 175 /* fall through */ 176 case 2: /* write, not present */ 177 if (!(vma->vm_flags & VM_WRITE)) 178 goto bad_area; 179 write++; 180 break; 181 case 1: /* read, present */ 182 goto bad_area; 183 case 0: /* read, not present */ 184 if (!(vma->vm_flags & (VM_READ | VM_EXEC))) 185 goto bad_area; 186 } 187 188 /* 189 * If for any reason at all we couldn't handle the fault, 190 * make sure we exit gracefully rather than endlessly redo 191 * the fault. 192 */ 193 switch (handle_mm_fault(mm, vma, address, write)) { 194 case 1: 195 tsk->min_flt++; 196 break; 197 case 2: 198 tsk->maj_flt++; 199 break; 200 case 0: 201 goto do_sigbus; 202 default: 203 goto out_of_memory; 204 } 205 206 up_read(&mm->mmap_sem); 207 return; 208 209/* 210 * Something tried to access memory that isn't in our memory map.. 211 * Fix it, but check if it's kernel or user first.. 212 */ 213bad_area: 214 up_read(&mm->mmap_sem); 215 216bad_area_nosemaphore: 217 218 /* User mode accesses just cause a SIGSEGV */ 219 if (error_code & 4) { 220 if (exception_trace) { 221 dump_pagetable(address); 222 printk("%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", 223 current->comm, current->pid, address, regs->rip, 224 regs->rsp, error_code); 225 } 226 tsk->thread.cr2 = address; 227 tsk->thread.error_code = error_code; 228 tsk->thread.trap_no = 14; 229 info.si_signo = SIGSEGV; 230 info.si_errno = 0; 231 /* info.si_code has been set above */ 232 info.si_addr = (void *)address; 233 force_sig_info(SIGSEGV, &info, tsk); 234 return; 235 } 236 237no_context: 238 239 /* Are we prepared to handle this kernel fault? */ 240 if ((fixup = search_exception_table(regs->rip)) != 0) { 241 regs->rip = fixup; 242 if (0 && exception_trace) 243 printk(KERN_ERR 244 "%s: fixed kernel exception at %lx address %lx err:%ld\n", 245 current->comm, regs->rip, address, error_code); 246 return; 247 } 248 249/* 250 * Oops. The kernel tried to access some bad page. We'll have to 251 * terminate things with extreme prejudice. 252 */ 253 254 console_verbose(); 255 bust_spinlocks(1); 256 257 if (!in_interrupt()) { 258 if (!spin_trylock(&pcrash_lock)) { 259 if (crashing_cpu != smp_processor_id()) 260 spin_lock(&pcrash_lock); 261 } 262 crashing_cpu = smp_processor_id(); 263 } 264 265 if (address < PAGE_SIZE) 266 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); 267 else 268 printk(KERN_ALERT "Unable to handle kernel paging request"); 269 printk(" at virtual address %016lx\n",address); 270 printk(" printing rip:\n"); 271 printk("%016lx\n", regs->rip); 272 dump_pagetable(address); 273 274 die("Oops", regs, error_code); 275 276 if (!in_interrupt()) { 277 crashing_cpu = -1; /* small harmless window */ 278 spin_unlock(&pcrash_lock); 279 } 280 281 bust_spinlocks(0); 282 do_exit(SIGKILL); 283 284/* 285 * We ran out of memory, or some other thing happened to us that made 286 * us unable to handle the page fault gracefully. 287 */ 288out_of_memory: 289 up_read(&mm->mmap_sem); 290 if (current->pid == 1) { 291 tsk->policy |= SCHED_YIELD; 292 schedule(); 293 goto again; 294 } 295 printk("VM: killing process %s\n", tsk->comm); 296 if (error_code & 4) 297 do_exit(SIGKILL); 298 goto no_context; 299 300do_sigbus: 301 up_read(&mm->mmap_sem); 302 303 /* 304 * Send a sigbus, regardless of whether we were in kernel 305 * or user mode. 306 */ 307 tsk->thread.cr2 = address; 308 tsk->thread.error_code = error_code; 309 tsk->thread.trap_no = 14; 310 info.si_signo = SIGBUS; 311 info.si_errno = 0; 312 info.si_code = BUS_ADRERR; 313 info.si_addr = (void *)address; 314 force_sig_info(SIGBUS, &info, tsk); 315 316 /* Kernel mode? Handle exceptions or die */ 317 if (!(error_code & 4)) 318 goto no_context; 319 return; 320 321 322vmalloc_fault: 323 { 324 pgd_t *pgd; 325 pmd_t *pmd; 326 pte_t *pte; 327 328 /* 329 * x86-64 has the same kernel 3rd level pages for all CPUs. 330 * But for vmalloc/modules the TLB synchronization works lazily, 331 * so it can happen that we get a page fault for something 332 * that is really already in the page table. Just check if it 333 * is really there and when yes flush the local TLB. 334 */ 335 336 pgd = pgd_offset_k(address); 337 if (pgd != current_pgd_offset_k(address)) 338 goto bad_area_nosemaphore; 339 if (!pgd_present(*pgd)) 340 goto bad_area_nosemaphore; 341 pmd = pmd_offset(pgd, address); 342 if (!pmd_present(*pmd)) 343 goto bad_area_nosemaphore; 344 pte = pte_offset(pmd, address); 345 if (!pte_present(*pte)) 346 goto bad_area_nosemaphore; 347 348 __flush_tlb_all(); 349 return; 350 } 351} 352