trap.c revision 105996
1/*-
2 * Copyright (c) 2001, Jake Burkholder
3 * Copyright (C) 1994, David Greenman
4 * Copyright (c) 1990, 1993
5 *      The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the University of Utah, and William Jolitz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *      This product includes software developed by the University of
21 *      California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *      from: @(#)trap.c        7.4 (Berkeley) 5/13/91
39 * 	from: FreeBSD: src/sys/i386/i386/trap.c,v 1.197 2001/07/19
40 * $FreeBSD: head/sys/sparc64/sparc64/trap.c 105996 2002-10-26 17:38:20Z jake $
41 */
42
43#include "opt_ddb.h"
44#include "opt_ktr.h"
45#include "opt_ktrace.h"
46
47#include <sys/param.h>
48#include <sys/kernel.h>
49#include <sys/bus.h>
50#include <sys/interrupt.h>
51#include <sys/ktr.h>
52#include <sys/kse.h>
53#include <sys/lock.h>
54#include <sys/mutex.h>
55#include <sys/systm.h>
56#include <sys/pioctl.h>
57#include <sys/proc.h>
58#include <sys/smp.h>
59#include <sys/syscall.h>
60#include <sys/sysctl.h>
61#include <sys/sysent.h>
62#include <sys/user.h>
63#include <sys/vmmeter.h>
64#ifdef KTRACE
65#include <sys/uio.h>
66#include <sys/ktrace.h>
67#endif
68
69#include <vm/vm.h>
70#include <vm/pmap.h>
71#include <vm/vm_extern.h>
72#include <vm/vm_param.h>
73#include <vm/vm_kern.h>
74#include <vm/vm_map.h>
75#include <vm/vm_page.h>
76
77#include <machine/clock.h>
78#include <machine/cpu.h>
79#include <machine/frame.h>
80#include <machine/intr_machdep.h>
81#include <machine/pcb.h>
82#include <machine/smp.h>
83#include <machine/trap.h>
84#include <machine/tstate.h>
85#include <machine/tte.h>
86#include <machine/tlb.h>
87#include <machine/tsb.h>
88#include <machine/watch.h>
89
90void trap(struct trapframe *tf);
91void syscall(struct trapframe *tf);
92
93static int trap_pfault(struct thread *td, struct trapframe *tf);
94
95extern char copy_fault[];
96extern char copy_nofault_begin[];
97extern char copy_nofault_end[];
98
99extern char fs_fault[];
100extern char fs_nofault_begin[];
101extern char fs_nofault_end[];
102extern char fs_nofault_intr_begin[];
103extern char fs_nofault_intr_end[];
104
105extern char *syscallnames[];
106
107const char *trap_msg[] = {
108	"reserved",
109	"instruction access exception",
110	"instruction access error",
111	"instruction access protection",
112	"illtrap instruction",
113	"illegal instruction",
114	"privileged opcode",
115	"floating point disabled",
116	"floating point exception ieee 754",
117	"floating point exception other",
118	"tag overflow",
119	"division by zero",
120	"data access exception",
121	"data access error",
122	"data access protection",
123	"memory address not aligned",
124	"privileged action",
125	"async data error",
126	"trap instruction 16",
127	"trap instruction 17",
128	"trap instruction 18",
129	"trap instruction 19",
130	"trap instruction 20",
131	"trap instruction 21",
132	"trap instruction 22",
133	"trap instruction 23",
134	"trap instruction 24",
135	"trap instruction 25",
136	"trap instruction 26",
137	"trap instruction 27",
138	"trap instruction 28",
139	"trap instruction 29",
140	"trap instruction 30",
141	"trap instruction 31",
142	"interrupt",
143	"physical address watchpoint",
144	"virtual address watchpoint",
145	"corrected ecc error",
146	"fast instruction access mmu miss",
147	"fast data access mmu miss",
148	"spill",
149	"fill",
150	"fill",
151	"breakpoint",
152	"clean window",
153	"range check",
154	"fix alignment",
155	"integer overflow",
156	"syscall",
157	"restore physical watchpoint",
158	"restore virtual watchpoint",
159	"kernel stack fault",
160};
161
162const int trap_sig[] = {
163	SIGILL,			/* reserved */
164	SIGILL,			/* instruction access exception */
165	SIGILL,			/* instruction access error */
166	SIGILL,			/* instruction access protection */
167	SIGILL,			/* illtrap instruction */
168	SIGILL,			/* illegal instruction */
169	SIGBUS,			/* privileged opcode */
170	SIGFPE,			/* floating point disabled */
171	SIGFPE,			/* floating point exception ieee 754 */
172	SIGFPE,			/* floating point exception other */
173	SIGEMT,			/* tag overflow */
174	SIGFPE,			/* division by zero */
175	SIGILL,			/* data access exception */
176	SIGILL,			/* data access error */
177	SIGBUS,			/* data access protection */
178	SIGBUS,			/* memory address not aligned */
179	SIGBUS,			/* privileged action */
180	SIGBUS,			/* async data error */
181	SIGILL,			/* trap instruction 16 */
182	SIGILL,			/* trap instruction 17 */
183	SIGILL,			/* trap instruction 18 */
184	SIGILL,			/* trap instruction 19 */
185	SIGILL,			/* trap instruction 20 */
186	SIGILL,			/* trap instruction 21 */
187	SIGILL,			/* trap instruction 22 */
188	SIGILL,			/* trap instruction 23 */
189	SIGILL,			/* trap instruction 24 */
190	SIGILL,			/* trap instruction 25 */
191	SIGILL,			/* trap instruction 26 */
192	SIGILL,			/* trap instruction 27 */
193	SIGILL,			/* trap instruction 28 */
194	SIGILL,			/* trap instruction 29 */
195	SIGILL,			/* trap instruction 30 */
196	SIGILL,			/* trap instruction 31 */
197	-1,			/* interrupt */
198	-1,			/* physical address watchpoint */
199	-1,			/* virtual address watchpoint */
200	-1,			/* corrected ecc error */
201	SIGSEGV,		/* fast instruction access mmu miss */
202	SIGSEGV,		/* fast data access mmu miss */
203	SIGILL,			/* spill */
204	SIGILL,			/* fill */
205	SIGILL,			/* fill */
206	SIGTRAP,		/* breakpoint */
207	SIGILL,			/* clean window */
208	SIGILL,			/* range check */
209	SIGILL,			/* fix alignment */
210	SIGILL,			/* integer overflow */
211	SIGSYS,			/* syscall */
212	-1,			/* restore physical watchpoint */
213	-1,			/* restore virtual watchpoint */
214	-1,			/* kernel stack fault */
215};
216
217CTASSERT(sizeof(struct trapframe) == 256);
218
219int debugger_on_signal = 0;
220SYSCTL_INT(_debug, OID_AUTO, debugger_on_signal, CTLFLAG_RW,
221    &debugger_on_signal, 0, "");
222
223void
224trap(struct trapframe *tf)
225{
226	struct thread *td;
227	struct proc *p;
228	u_int sticks;
229	int error;
230	int sig;
231
232	td = PCPU_GET(curthread);
233
234	CTR4(KTR_TRAP, "trap: %p type=%s (%s) pil=%#lx", td,
235	    trap_msg[tf->tf_type & ~T_KERNEL],
236	    (TRAPF_USERMODE(tf) ? "user" : "kernel"), rdpr(pil));
237
238	atomic_add_int(&cnt.v_trap, 1);
239
240	if ((tf->tf_tstate & TSTATE_PRIV) == 0) {
241		KASSERT(td != NULL, ("trap: curthread NULL"));
242		KASSERT(td->td_kse != NULL, ("trap: curkse NULL"));
243		KASSERT(td->td_proc != NULL, ("trap: curproc NULL"));
244
245		p = td->td_proc;
246		sticks = td->td_kse->ke_sticks;
247		td->td_frame = tf;
248		if (td->td_ucred != p->p_ucred)
249			cred_update_thread(td);
250		if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
251			PROC_LOCK(p);
252			mtx_lock_spin(&sched_lock);
253			thread_exit();
254			/* NOTREACHED */
255		}
256
257		switch (tf->tf_type) {
258		case T_DATA_MISS:
259		case T_DATA_PROTECTION:
260		case T_INSTRUCTION_MISS:
261			sig = trap_pfault(td, tf);
262			break;
263		case T_FILL:
264			sig = rwindow_load(td, tf, 2);
265			break;
266		case T_FILL_RET:
267			sig = rwindow_load(td, tf, 1);
268			break;
269		case T_SPILL:
270			sig = rwindow_save(td);
271			break;
272		default:
273			if (tf->tf_type < 0 || tf->tf_type >= T_MAX ||
274			    trap_sig[tf->tf_type] == -1)
275				panic("trap: bad trap type");
276			sig = trap_sig[tf->tf_type];
277			break;
278		}
279
280		if (sig != 0) {
281			/* Translate fault for emulators. */
282			if (p->p_sysent->sv_transtrap != NULL) {
283				sig = p->p_sysent->sv_transtrap(sig,
284				    tf->tf_type);
285			}
286			if (debugger_on_signal &&
287			    (sig == 4 || sig == 10 || sig == 11))
288				Debugger("trapsig");
289			trapsignal(p, sig, tf->tf_type);
290		}
291
292		userret(td, tf, sticks);
293		mtx_assert(&Giant, MA_NOTOWNED);
294#ifdef DIAGNOSTIC
295		cred_free_thread(td);
296#endif
297 	} else {
298		KASSERT((tf->tf_type & T_KERNEL) != 0,
299		    ("trap: kernel trap isn't"));
300
301		switch (tf->tf_type & ~T_KERNEL) {
302#ifdef DDB
303		case T_BREAKPOINT:
304		case T_KSTACK_FAULT:
305			error = (kdb_trap(tf) == 0);
306			break;
307#ifdef notyet
308		case T_PA_WATCHPOINT:
309		case T_VA_WATCHPOINT:
310			error = db_watch_trap(tf);
311			break;
312#endif
313#endif
314		case T_DATA_MISS:
315		case T_DATA_PROTECTION:
316		case T_INSTRUCTION_MISS:
317			error = trap_pfault(td, tf);
318			break;
319		case T_DATA_EXCEPTION:
320		case T_MEM_ADDRESS_NOT_ALIGNED:
321			if ((tf->tf_sfsr & MMU_SFSR_FV) != 0 &&
322			    MMU_SFSR_GET_ASI(tf->tf_sfsr) == ASI_AIUP) {
323				if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
324				    tf->tf_tpc <= (u_long)copy_nofault_end) {
325					tf->tf_tpc = (u_long)copy_fault;
326					tf->tf_tnpc = tf->tf_tpc + 4;
327					error = 0;
328					break;
329				}
330				if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
331				    tf->tf_tpc <= (u_long)fs_nofault_end) {
332					tf->tf_tpc = (u_long)fs_fault;
333					tf->tf_tnpc = tf->tf_tpc + 4;
334					error = 0;
335					break;
336				}
337			}
338			error = 1;
339			break;
340		default:
341			error = 1;
342			break;
343		}
344
345		if (error != 0)
346			panic("trap: %s", trap_msg[tf->tf_type & ~T_KERNEL]);
347	}
348	CTR1(KTR_TRAP, "trap: td=%p return", td);
349}
350
351static int
352trap_pfault(struct thread *td, struct trapframe *tf)
353{
354	struct vmspace *vm;
355	struct pcb *pcb;
356	struct proc *p;
357	vm_offset_t va;
358	vm_prot_t prot;
359	u_long ctx;
360	int flags;
361	int type;
362	int rv;
363
364	if (td == NULL)
365		return (-1);
366	KASSERT(td->td_pcb != NULL, ("trap_pfault: pcb NULL"));
367	KASSERT(td->td_proc != NULL, ("trap_pfault: curproc NULL"));
368	KASSERT(td->td_proc->p_vmspace != NULL, ("trap_pfault: vmspace NULL"));
369
370	p = td->td_proc;
371
372	rv = KERN_SUCCESS;
373	ctx = TLB_TAR_CTX(tf->tf_tar);
374	pcb = td->td_pcb;
375	type = tf->tf_type & ~T_KERNEL;
376	va = TLB_TAR_VA(tf->tf_tar);
377
378	CTR4(KTR_TRAP, "trap_pfault: td=%p pm_ctx=%#lx va=%#lx ctx=%#lx",
379	    td, p->p_vmspace->vm_pmap.pm_context[PCPU_GET(cpuid)], va, ctx);
380
381	if (type == T_DATA_PROTECTION) {
382		prot = VM_PROT_WRITE;
383		flags = VM_FAULT_DIRTY;
384	} else {
385		if (type == T_DATA_MISS)
386			prot = VM_PROT_READ;
387		else
388			prot = VM_PROT_READ | VM_PROT_EXECUTE;
389		flags = VM_FAULT_NORMAL;
390	}
391
392	if (ctx != TLB_CTX_KERNEL) {
393		if ((tf->tf_tstate & TSTATE_PRIV) != 0 &&
394		    (tf->tf_tpc >= (u_long)fs_nofault_intr_begin &&
395		     tf->tf_tpc <= (u_long)fs_nofault_intr_end)) {
396			tf->tf_tpc = (u_long)fs_fault;
397			tf->tf_tnpc = tf->tf_tpc + 4;
398			return (0);
399		}
400
401		/*
402		 * This is a fault on non-kernel virtual memory.
403		 */
404		vm = p->p_vmspace;
405
406		/*
407		 * Keep swapout from messing with us during this
408		 * critical time.
409		 */
410		PROC_LOCK(p);
411		++p->p_lock;
412		PROC_UNLOCK(p);
413
414		/* Fault in the user page. */
415		rv = vm_fault(&vm->vm_map, va, prot, flags);
416
417		/*
418		 * Now the process can be swapped again.
419		 */
420		PROC_LOCK(p);
421		--p->p_lock;
422		PROC_UNLOCK(p);
423	} else {
424		/*
425		 * This is a fault on kernel virtual memory.  Attempts to
426		 * access kernel memory from user mode cause privileged
427		 * action traps, not page fault.
428		 */
429		KASSERT(tf->tf_tstate & TSTATE_PRIV,
430		    ("trap_pfault: fault on nucleus context from user mode"));
431
432		/*
433		 * Don't have to worry about process locking or stacks in the
434		 * kernel.
435		 */
436		rv = vm_fault(kernel_map, va, prot, VM_FAULT_NORMAL);
437	}
438
439	CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d",
440	    td, va, rv);
441	if (rv == KERN_SUCCESS)
442		return (0);
443	if (ctx != TLB_CTX_KERNEL && (tf->tf_tstate & TSTATE_PRIV) != 0) {
444		if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
445		    tf->tf_tpc <= (u_long)fs_nofault_end) {
446			tf->tf_tpc = (u_long)fs_fault;
447			tf->tf_tnpc = tf->tf_tpc + 4;
448			return (0);
449		}
450		if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
451		    tf->tf_tpc <= (u_long)copy_nofault_end) {
452			tf->tf_tpc = (u_long)copy_fault;
453			tf->tf_tnpc = tf->tf_tpc + 4;
454			return (0);
455		}
456	}
457	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
458}
459
460/* Maximum number of arguments that can be passed via the out registers. */
461#define	REG_MAXARGS	6
462
463/*
464 * Syscall handler. The arguments to the syscall are passed in the o registers
465 * by the caller, and are saved in the trap frame. The syscall number is passed
466 * in %g1 (and also saved in the trap frame).
467 */
468void
469syscall(struct trapframe *tf)
470{
471	struct sysent *callp;
472	struct thread *td;
473	register_t args[8];
474	register_t *argp;
475	struct proc *p;
476	u_int sticks;
477	u_long code;
478	u_long tpc;
479	int reg;
480	int regcnt;
481	int narg;
482	int error;
483
484	td = PCPU_GET(curthread);
485	KASSERT(td != NULL, ("trap: curthread NULL"));
486	KASSERT(td->td_kse != NULL, ("trap: curkse NULL"));
487	KASSERT(td->td_proc != NULL, ("trap: curproc NULL"));
488
489	p = td->td_proc;
490
491	atomic_add_int(&cnt.v_syscall, 1);
492
493	narg = 0;
494	error = 0;
495	reg = 0;
496	regcnt = REG_MAXARGS;
497
498	sticks = td->td_kse->ke_sticks;
499	td->td_frame = tf;
500	if (td->td_ucred != p->p_ucred)
501		cred_update_thread(td);
502	if (p->p_flag & P_KSES)
503		thread_user_enter(p, td);
504	code = tf->tf_global[1];
505
506	/*
507	 * For syscalls, we don't want to retry the faulting instruction
508	 * (usually), instead we need to advance one instruction.
509	 */
510	tpc = tf->tf_tpc;
511	TF_DONE(tf);
512
513	if (p->p_sysent->sv_prepsyscall) {
514		/*
515		 * The prep code is MP aware.
516		 */
517#if 0
518		(*p->p_sysent->sv_prepsyscall)(tf, args, &code, &params);
519#endif
520	} else 	if (code == SYS_syscall || code == SYS___syscall) {
521		code = tf->tf_out[reg++];
522		regcnt--;
523	}
524
525 	if (p->p_sysent->sv_mask)
526 		code &= p->p_sysent->sv_mask;
527
528 	if (code >= p->p_sysent->sv_size)
529 		callp = &p->p_sysent->sv_table[0];
530  	else
531 		callp = &p->p_sysent->sv_table[code];
532
533	narg = callp->sy_narg & SYF_ARGMASK;
534
535	if (narg <= regcnt) {
536		argp = &tf->tf_out[reg];
537		error = 0;
538	} else {
539		KASSERT(narg <= sizeof(args) / sizeof(args[0]),
540		    ("Too many syscall arguments!"));
541		argp = args;
542		bcopy(&tf->tf_out[reg], args, sizeof(args[0]) * regcnt);
543		error = copyin((void *)(tf->tf_out[6] + SPOFF +
544		    offsetof(struct frame, fr_pad[6])),
545		    &args[regcnt], (narg - regcnt) * sizeof(args[0]));
546	}
547
548	CTR5(KTR_SYSC, "syscall: td=%p %s(%#lx, %#lx, %#lx)", td,
549	    syscallnames[code], argp[0], argp[1], argp[2]);
550
551	/*
552	 * Try to run the syscall without the MP lock if the syscall
553	 * is MP safe.
554	 */
555	if ((callp->sy_narg & SYF_MPSAFE) == 0)
556		mtx_lock(&Giant);
557
558#ifdef KTRACE
559	if (KTRPOINT(td, KTR_SYSCALL))
560		ktrsyscall(code, narg, argp);
561#endif
562	if (error == 0) {
563		td->td_retval[0] = 0;
564		td->td_retval[1] = 0;
565
566		STOPEVENT(p, S_SCE, narg);	/* MP aware */
567
568		error = (*callp->sy_call)(td, argp);
569
570		CTR5(KTR_SYSC, "syscall: p=%p error=%d %s return %#lx %#lx ", p,
571		    error, syscallnames[code], td->td_retval[0],
572		    td->td_retval[1]);
573	}
574
575	/*
576	 * MP SAFE (we may or may not have the MP lock at this point)
577	 */
578	switch (error) {
579	case 0:
580		tf->tf_out[0] = td->td_retval[0];
581		tf->tf_out[1] = td->td_retval[1];
582		tf->tf_tstate &= ~TSTATE_XCC_C;
583		break;
584
585	case ERESTART:
586		/*
587		 * Undo the tpc advancement we have done above, we want to
588		 * reexecute the system call.
589		 */
590		tf->tf_tpc = tpc;
591		tf->tf_tnpc -= 4;
592		break;
593
594	case EJUSTRETURN:
595		break;
596
597	default:
598 		if (p->p_sysent->sv_errsize) {
599 			if (error >= p->p_sysent->sv_errsize)
600  				error = -1;	/* XXX */
601   			else
602  				error = p->p_sysent->sv_errtbl[error];
603		}
604		tf->tf_out[0] = error;
605		tf->tf_tstate |= TSTATE_XCC_C;
606		break;
607	}
608
609	/*
610	 * Release Giant if we had to get it.  Don't use mtx_owned(),
611	 * we want to catch broken syscalls.
612	 */
613	if ((callp->sy_narg & SYF_MPSAFE) == 0)
614		mtx_unlock(&Giant);
615
616	/*
617	 * Handle reschedule and other end-of-syscall issues
618	 */
619	userret(td, tf, sticks);
620
621#ifdef KTRACE
622	if (KTRPOINT(td, KTR_SYSRET))
623		ktrsysret(code, error, td->td_retval[0]);
624#endif
625	/*
626	 * This works because errno is findable through the
627	 * register set.  If we ever support an emulation where this
628	 * is not the case, this code will need to be revisited.
629	 */
630	STOPEVENT(p, S_SCX, code);
631
632#ifdef DIAGNOSTIC
633	cred_free_thread(td);
634#endif
635#ifdef WITNESS
636	if (witness_list(td)) {
637		panic("system call %s returning with mutex(s) held\n",
638		    syscallnames[code]);
639	}
640#endif
641	mtx_assert(&sched_lock, MA_NOTOWNED);
642	mtx_assert(&Giant, MA_NOTOWNED);
643}
644