trap.c revision 82006
1/*- 2 * Copyright (c) 2001, Jake Burkholder 3 * Copyright (C) 1994, David Greenman 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the University of Utah, and William Jolitz. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 39 * from: FreeBSD: src/sys/i386/i386/trap.c,v 1.197 2001/07/19 40 * $FreeBSD: head/sys/sparc64/sparc64/trap.c 82006 2001-08-20 23:43:43Z jake $ 41 */ 42 43#include "opt_ddb.h" 44 45#include <sys/param.h> 46#include <sys/lock.h> 47#include <sys/mutex.h> 48#include <sys/systm.h> 49#include <sys/pioctl.h> 50#include <sys/proc.h> 51#include <sys/syscall.h> 52#include <sys/sysent.h> 53#include <sys/user.h> 54#include <sys/vmmeter.h> 55 56#include <vm/vm.h> 57#include <vm/pmap.h> 58#include <vm/vm_extern.h> 59#include <vm/vm_param.h> 60#include <vm/vm_kern.h> 61#include <vm/vm_map.h> 62#include <vm/vm_page.h> 63 64#include <machine/clock.h> 65#include <machine/frame.h> 66#include <machine/intr_machdep.h> 67#include <machine/pcb.h> 68#include <machine/pv.h> 69#include <machine/trap.h> 70#include <machine/tstate.h> 71#include <machine/tte.h> 72#include <machine/tlb.h> 73#include <machine/tsb.h> 74#include <machine/watch.h> 75 76void trap(struct trapframe *tf); 77int trap_mmu_fault(struct proc *p, struct trapframe *tf); 78void syscall(struct proc *p, struct trapframe *tf, u_int sticks); 79 80const char *trap_msg[] = { 81 "reserved", 82 "power on reset", 83 "watchdog reset", 84 "externally initiated reset", 85 "software initiated reset", 86 "red state exception", 87 "instruction access exception", 88 "instruction access error", 89 "illegal instruction", 90 "privileged opcode", 91 "floating point disabled", 92 "floating point exception ieee 754", 93 "floating point exception other", 94 "tag overflow", 95 "division by zero", 96 "data access exception", 97 "data access error", 98 "memory address not aligned", 99 "lddf memory address not aligned", 100 "stdf memory address not aligned", 101 "privileged action", 102 "interrupt vector", 103 "physical address watchpoint", 104 "virtual address watchpoint", 105 "corrected ecc error", 106 "fast instruction access mmu miss", 107 "fast data access mmu miss", 108 "fast data access protection", 109 "clock", 110 "bad spill", 111 "bad fill", 112 "breakpoint", 113 "syscall", 114}; 115 116void 117trap(struct trapframe *tf) 118{ 119 u_int sticks; 120 struct proc *p; 121 int error; 122 int ucode; 123 int type; 124 int sig; 125 int mask; 126 127 KASSERT(PCPU_GET(curproc) != NULL, ("trap: curproc NULL")); 128 KASSERT(PCPU_GET(curpcb) != NULL, ("trap: curpcb NULL")); 129 130 p = PCPU_GET(curproc); 131 type = T_TYPE(tf->tf_type); 132 ucode = type; /* XXX */ 133 134 if ((type & T_KERNEL) == 0) 135 sticks = p->p_sticks; 136 137 switch (type) { 138 case T_FP_DISABLED: 139 if (fp_enable_proc(p)) 140 goto user; 141 else { 142 sig = SIGFPE; 143 goto trapsig; 144 } 145 break; 146 case T_IMMU_MISS: 147 case T_DMMU_MISS: 148 case T_DMMU_PROT: 149 mtx_lock(&Giant); 150 error = trap_mmu_fault(p, tf); 151 mtx_unlock(&Giant); 152 if (error == 0) 153 goto user; 154 break; 155 case T_INTR: 156 intr_dispatch(T_LEVEL(tf->tf_type), tf); 157 goto user; 158 case T_SYSCALL: 159 /* syscall() calls userret(), so we need goto out; */ 160 syscall(p, tf, sticks); 161 goto out; 162#ifdef DDB 163 case T_BREAKPOINT | T_KERNEL: 164 if (kdb_trap(tf) != 0) 165 goto out; 166 break; 167#endif 168 case T_WATCH_VIRT | T_KERNEL: 169 /* 170 * At the moment, just print the information from the trap, 171 * remove the watchpoint, use evil magic to execute the 172 * instruction (we temporarily save the instruction at 173 * %tnpc, write a trap instruction, resume, and reset the 174 * watch point when the trap arrives). 175 * To make sure that no interrupt gets in between and creates 176 * a potentially large window where the watchpoint is inactive, 177 * disable interrupts temporarily. 178 * This is obviously fragile and evilish. 179 */ 180 printf("Virtual watchpoint triggered, tpc=0x%lx, tnpc=0x%lx\n", 181 tf->tf_tpc, tf->tf_tnpc); 182 PCPU_SET(wp_pstate, (tf->tf_tstate & TSTATE_PSTATE_MASK) >> 183 TSTATE_PSTATE_SHIFT); 184 tf->tf_tstate &= ~TSTATE_IE; 185 wrpr(pstate, rdpr(pstate), PSTATE_IE); 186 PCPU_SET(wp_insn, *((u_int *)tf->tf_tnpc)); 187 *((u_int *)tf->tf_tnpc) = 0x91d03002; /* ta %xcc, 2 */ 188 flush(tf->tf_tnpc); 189 PCPU_SET(wp_va, watch_virt_get(&mask)); 190 PCPU_SET(wp_mask, mask); 191 watch_virt_clear(); 192 goto out; 193 case T_RESTOREWP | T_KERNEL: 194 /* 195 * Undo the tweaks tone for T_WATCH, reset the watch point and 196 * contunue execution. 197 * Note that here, we run with interrupts enabled, so there 198 * is a small chance that we will be interrupted before we 199 * could reset the watch point. 200 */ 201 tf->tf_tstate = (tf->tf_tstate & ~TSTATE_PSTATE_MASK) | 202 PCPU_GET(wp_pstate) << TSTATE_PSTATE_SHIFT; 203 watch_virt_set_mask(PCPU_GET(wp_va), PCPU_GET(wp_mask)); 204 *(u_int *)tf->tf_tpc = PCPU_GET(wp_insn); 205 flush(tf->tf_tpc); 206 goto out; 207 case T_DMMU_MISS | T_KERNEL: 208 case T_DMMU_PROT | T_KERNEL: 209 mtx_lock(&Giant); 210 error = trap_mmu_fault(p, tf); 211 mtx_unlock(&Giant); 212 if (error == 0) 213 goto out; 214 break; 215 case T_INTR | T_KERNEL: 216 intr_dispatch(T_LEVEL(tf->tf_type), tf); 217 goto out; 218 default: 219 break; 220 } 221 panic("trap: %s", trap_msg[type & ~T_KERNEL]); 222 223trapsig: 224 mtx_lock(&Giant); 225 /* Translate fault for emulators. */ 226 if (p->p_sysent->sv_transtrap != NULL) 227 sig = (p->p_sysent->sv_transtrap)(sig, type); 228 229 trapsignal(p, sig, ucode); 230 mtx_unlock(&Giant); 231user: 232 userret(p, tf, sticks); 233 if (mtx_owned(&Giant)) 234 mtx_unlock(&Giant); 235out: 236 return; 237} 238 239int 240trap_mmu_fault(struct proc *p, struct trapframe *tf) 241{ 242 struct mmuframe *mf; 243 struct vmspace *vm; 244 vm_offset_t va; 245 vm_prot_t type; 246 int rv; 247 248 KASSERT(p->p_vmspace != NULL, ("trap_dmmu_miss: vmspace NULL")); 249 250 type = 0; 251 rv = KERN_FAILURE; 252 mf = tf->tf_arg; 253 va = TLB_TAR_VA(mf->mf_tar); 254 switch (tf->tf_type) { 255 case T_DMMU_MISS | T_KERNEL: 256 /* 257 * If the context is nucleus this is a soft fault on kernel 258 * memory, just fault in the pages. 259 */ 260 if (TLB_TAR_CTX(mf->mf_tar) == TLB_CTX_KERNEL) { 261 rv = vm_fault(kernel_map, va, VM_PROT_READ, 262 VM_FAULT_NORMAL); 263 break; 264 } 265 266 /* 267 * Don't allow kernel mode faults on user memory unless 268 * pcb_onfault is set. 269 */ 270 if (PCPU_GET(curpcb)->pcb_onfault == NULL) 271 break; 272 /* Fallthrough. */ 273 case T_IMMU_MISS: 274 case T_DMMU_MISS: 275 /* 276 * First try the tsb. The primary tsb was already searched. 277 */ 278 vm = p->p_vmspace; 279 if (tsb_miss(&vm->vm_pmap, tf->tf_type, mf) == 0) { 280 rv = KERN_SUCCESS; 281 break; 282 } 283 284 /* 285 * Not found, call the vm system. 286 */ 287 288 if (tf->tf_type == T_IMMU_MISS) 289 type = VM_PROT_EXECUTE | VM_PROT_READ; 290 else 291 type = VM_PROT_READ; 292 293 /* 294 * Keep the process from being swapped out at this critical 295 * time. 296 */ 297 PROC_LOCK(p); 298 ++p->p_lock; 299 PROC_UNLOCK(p); 300 301 /* 302 * Grow the stack if necessary. vm_map_growstack only fails 303 * if the va falls into a growable stack region and the stack 304 * growth fails. If it succeeds, or the va was not within a 305 * growable stack region, fault in the user page. 306 */ 307 if (vm_map_growstack(p, va) != KERN_SUCCESS) 308 rv = KERN_FAILURE; 309 else 310 rv = vm_fault(&vm->vm_map, va, type, VM_FAULT_NORMAL); 311 312 /* 313 * Now the process can be swapped again. 314 */ 315 PROC_LOCK(p); 316 --p->p_lock; 317 PROC_UNLOCK(p); 318 break; 319 case T_DMMU_PROT | T_KERNEL: 320 /* 321 * Protection faults should not happen on kernel memory. 322 */ 323 if (TLB_TAR_CTX(mf->mf_tar) == TLB_CTX_KERNEL) 324 break; 325 326 /* 327 * Don't allow kernel mode faults on user memory unless 328 * pcb_onfault is set. 329 */ 330 if (PCPU_GET(curpcb)->pcb_onfault == NULL) 331 break; 332 /* Fallthrough. */ 333 case T_DMMU_PROT: 334 /* 335 * Only look in the tsb. Write access to an unmapped page 336 * causes a miss first, so the page must have already been 337 * brought in by vm_fault, we just need to find the tte and 338 * update the write bit. XXX How do we tell them vm system 339 * that we are now writing? 340 */ 341 vm = p->p_vmspace; 342 if (tsb_miss(&vm->vm_pmap, tf->tf_type, mf) == 0) 343 rv = KERN_SUCCESS; 344 break; 345 default: 346 break; 347 } 348 if (rv == KERN_SUCCESS) 349 return (0); 350 if (tf->tf_type & T_KERNEL) { 351 if (PCPU_GET(curpcb)->pcb_onfault != NULL && 352 TLB_TAR_CTX(mf->mf_tar) != TLB_CTX_KERNEL) { 353 tf->tf_tpc = (u_long)PCPU_GET(curpcb)->pcb_onfault; 354 tf->tf_tnpc = tf->tf_tpc + 4; 355 return (0); 356 } 357 } 358 return (rv == KERN_PROTECTION_FAILURE ? SIGBUS : SIGSEGV); 359} 360 361/* Maximum number of arguments that can be passed via the out registers. */ 362#define REG_MAXARGS 6 363 364/* 365 * Syscall handler. The arguments to the syscall are passed in the o registers 366 * by the caller, and are saved in the trap frame. The syscall number is passed 367 * in %g1 (and also saved in the trap frame). 368 */ 369void 370syscall(struct proc *p, struct trapframe *tf, u_int sticks) 371{ 372 struct sysent *callp; 373 u_long code; 374 u_long tpc; 375 int reg; 376 int regcnt; 377 int narg; 378 int error; 379 register_t args[8]; 380 void *argp; 381 382 narg = 0; 383 error = 0; 384 reg = 0; 385 regcnt = REG_MAXARGS; 386 code = tf->tf_global[1]; 387 atomic_add_int(&cnt.v_syscall, 1); 388 /* 389 * For syscalls, we don't want to retry the faulting instruction 390 * (usually), instead we need to advance one instruction. 391 */ 392 tpc = tf->tf_tpc; 393 tf->tf_tpc = tf->tf_tnpc; 394 tf->tf_tnpc += 4; 395 396 if (p->p_sysent->sv_prepsyscall) { 397 /* 398 * The prep code is not MP aware. 399 */ 400#if 0 401 mtx_lock(&Giant); 402 (*p->p_sysent->sv_prepsyscall)(tf, args, &code, ¶ms); 403 mtx_unlock(&Giant); 404#endif 405 } else if (code == SYS_syscall || code == SYS___syscall) { 406 code = tf->tf_out[reg++]; 407 regcnt--; 408 } 409 410 if (p->p_sysent->sv_mask) 411 code &= p->p_sysent->sv_mask; 412 413 if (code >= p->p_sysent->sv_size) 414 callp = &p->p_sysent->sv_table[0]; 415 else 416 callp = &p->p_sysent->sv_table[code]; 417 418 narg = callp->sy_narg & SYF_ARGMASK; 419 420 if (narg <= regcnt) 421 argp = &tf->tf_out[reg]; 422 else { 423 KASSERT(narg <= sizeof(args) / sizeof(args[0]), 424 ("Too many syscall arguments!")); 425 argp = args; 426 bcopy(&tf->tf_out[reg], args, sizeof(args[0]) * regcnt); 427 error = copyin((void *)(tf->tf_out[6] + SPOFF + 428 offsetof(struct frame, f_pad[6])), 429 &args[reg + regcnt], (narg - regcnt) * sizeof(args[0])); 430 if (error != 0) 431 goto bad; 432 } 433 434 /* 435 * Try to run the syscall without the MP lock if the syscall 436 * is MP safe. 437 */ 438 if ((callp->sy_narg & SYF_MPSAFE) == 0) 439 mtx_lock(&Giant); 440 441#ifdef KTRACE 442 /* 443 * We have to obtain the MP lock no matter what if 444 * we are ktracing 445 */ 446 if (KTRPOINT(p, KTR_SYSCALL)) { 447 if (!mtx_owned(&Giant)) 448 mtx_lock(&Giant); 449 ktrsyscall(p->p_tracep, code, narg, args); 450 } 451#endif 452 p->p_retval[0] = 0; 453 p->p_retval[1] = tf->tf_out[1]; 454 455 STOPEVENT(p, S_SCE, narg); /* MP aware */ 456 457 error = (*callp->sy_call)(p, argp); 458 459 /* 460 * MP SAFE (we may or may not have the MP lock at this point) 461 */ 462 switch (error) { 463 case 0: 464 tf->tf_out[0] = p->p_retval[0]; 465 tf->tf_out[1] = p->p_retval[1]; 466 tf->tf_tstate &= ~TSTATE_XCC_C; 467 break; 468 469 case ERESTART: 470 /* 471 * Undo the tpc advancement we have done above, we want to 472 * reexecute the system call. 473 */ 474 tf->tf_tpc = tpc; 475 tf->tf_tnpc -= 4; 476 break; 477 478 case EJUSTRETURN: 479 break; 480 481 default: 482bad: 483 if (p->p_sysent->sv_errsize) { 484 if (error >= p->p_sysent->sv_errsize) 485 error = -1; /* XXX */ 486 else 487 error = p->p_sysent->sv_errtbl[error]; 488 } 489 tf->tf_out[0] = error; 490 tf->tf_tstate |= TSTATE_XCC_C; 491 break; 492 } 493 494 /* 495 * Handle reschedule and other end-of-syscall issues 496 */ 497 userret(p, tf, sticks); 498 499#ifdef KTRACE 500 if (KTRPOINT(p, KTR_SYSRET)) { 501 if (!mtx_owned(&Giant)) 502 mtx_lock(&Giant); 503 ktrsysret(p->p_tracep, code, error, p->p_retval[0]); 504 } 505#endif 506 507 /* 508 * Release Giant if we had to get it 509 */ 510 if (mtx_owned(&Giant)) 511 mtx_unlock(&Giant); 512 513 /* 514 * This works because errno is findable through the 515 * register set. If we ever support an emulation where this 516 * is not the case, this code will need to be revisited. 517 */ 518 STOPEVENT(p, S_SCX, code); 519 520#ifdef WITNESS 521 if (witness_list(p)) { 522 panic("system call %s returning with mutex(s) held\n", 523 syscallnames[code]); 524 } 525#endif 526 mtx_assert(&sched_lock, MA_NOTOWNED); 527 mtx_assert(&Giant, MA_NOTOWNED); 528 529} 530