trap.c revision 113833
1/*-
2 * Copyright (c) 2001, Jake Burkholder
3 * Copyright (C) 1994, David Greenman
4 * Copyright (c) 1990, 1993
5 *      The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the University of Utah, and William Jolitz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *      This product includes software developed by the University of
21 *      California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *      from: @(#)trap.c        7.4 (Berkeley) 5/13/91
39 * 	from: FreeBSD: src/sys/i386/i386/trap.c,v 1.197 2001/07/19
40 * $FreeBSD: head/sys/sparc64/sparc64/trap.c 113833 2003-04-22 03:17:41Z davidxu $
41 */
42
43#include "opt_ddb.h"
44#include "opt_ktr.h"
45#include "opt_ktrace.h"
46
47#include <sys/param.h>
48#include <sys/kernel.h>
49#include <sys/bus.h>
50#include <sys/interrupt.h>
51#include <sys/ktr.h>
52#include <sys/lock.h>
53#include <sys/mutex.h>
54#include <sys/systm.h>
55#include <sys/pioctl.h>
56#include <sys/proc.h>
57#include <sys/smp.h>
58#include <sys/syscall.h>
59#include <sys/sysctl.h>
60#include <sys/sysent.h>
61#include <sys/user.h>
62#include <sys/vmmeter.h>
63#ifdef KTRACE
64#include <sys/uio.h>
65#include <sys/ktrace.h>
66#endif
67
68#include <vm/vm.h>
69#include <vm/pmap.h>
70#include <vm/vm_extern.h>
71#include <vm/vm_param.h>
72#include <vm/vm_kern.h>
73#include <vm/vm_map.h>
74#include <vm/vm_page.h>
75
76#include <machine/clock.h>
77#include <machine/cpu.h>
78#include <machine/frame.h>
79#include <machine/intr_machdep.h>
80#include <machine/pcb.h>
81#include <machine/smp.h>
82#include <machine/trap.h>
83#include <machine/tstate.h>
84#include <machine/tte.h>
85#include <machine/tlb.h>
86#include <machine/tsb.h>
87#include <machine/watch.h>
88
89void trap(struct trapframe *tf);
90void syscall(struct trapframe *tf);
91
92static int trap_pfault(struct thread *td, struct trapframe *tf);
93
94extern char copy_fault[];
95extern char copy_nofault_begin[];
96extern char copy_nofault_end[];
97
98extern char fs_fault[];
99extern char fs_nofault_begin[];
100extern char fs_nofault_end[];
101extern char fs_nofault_intr_begin[];
102extern char fs_nofault_intr_end[];
103
104extern char *syscallnames[];
105
106const char *trap_msg[] = {
107	"reserved",
108	"instruction access exception",
109	"instruction access error",
110	"instruction access protection",
111	"illtrap instruction",
112	"illegal instruction",
113	"privileged opcode",
114	"floating point disabled",
115	"floating point exception ieee 754",
116	"floating point exception other",
117	"tag overflow",
118	"division by zero",
119	"data access exception",
120	"data access error",
121	"data access protection",
122	"memory address not aligned",
123	"privileged action",
124	"async data error",
125	"trap instruction 16",
126	"trap instruction 17",
127	"trap instruction 18",
128	"trap instruction 19",
129	"trap instruction 20",
130	"trap instruction 21",
131	"trap instruction 22",
132	"trap instruction 23",
133	"trap instruction 24",
134	"trap instruction 25",
135	"trap instruction 26",
136	"trap instruction 27",
137	"trap instruction 28",
138	"trap instruction 29",
139	"trap instruction 30",
140	"trap instruction 31",
141	"interrupt",
142	"physical address watchpoint",
143	"virtual address watchpoint",
144	"corrected ecc error",
145	"fast instruction access mmu miss",
146	"fast data access mmu miss",
147	"spill",
148	"fill",
149	"fill",
150	"breakpoint",
151	"clean window",
152	"range check",
153	"fix alignment",
154	"integer overflow",
155	"syscall",
156	"restore physical watchpoint",
157	"restore virtual watchpoint",
158	"kernel stack fault",
159};
160
161const int trap_sig[] = {
162	SIGILL,			/* reserved */
163	SIGILL,			/* instruction access exception */
164	SIGILL,			/* instruction access error */
165	SIGILL,			/* instruction access protection */
166	SIGILL,			/* illtrap instruction */
167	SIGILL,			/* illegal instruction */
168	SIGBUS,			/* privileged opcode */
169	SIGFPE,			/* floating point disabled */
170	SIGFPE,			/* floating point exception ieee 754 */
171	SIGFPE,			/* floating point exception other */
172	SIGEMT,			/* tag overflow */
173	SIGFPE,			/* division by zero */
174	SIGILL,			/* data access exception */
175	SIGILL,			/* data access error */
176	SIGBUS,			/* data access protection */
177	SIGBUS,			/* memory address not aligned */
178	SIGBUS,			/* privileged action */
179	SIGBUS,			/* async data error */
180	SIGILL,			/* trap instruction 16 */
181	SIGILL,			/* trap instruction 17 */
182	SIGILL,			/* trap instruction 18 */
183	SIGILL,			/* trap instruction 19 */
184	SIGILL,			/* trap instruction 20 */
185	SIGILL,			/* trap instruction 21 */
186	SIGILL,			/* trap instruction 22 */
187	SIGILL,			/* trap instruction 23 */
188	SIGILL,			/* trap instruction 24 */
189	SIGILL,			/* trap instruction 25 */
190	SIGILL,			/* trap instruction 26 */
191	SIGILL,			/* trap instruction 27 */
192	SIGILL,			/* trap instruction 28 */
193	SIGILL,			/* trap instruction 29 */
194	SIGILL,			/* trap instruction 30 */
195	SIGILL,			/* trap instruction 31 */
196	-1,			/* interrupt */
197	-1,			/* physical address watchpoint */
198	-1,			/* virtual address watchpoint */
199	-1,			/* corrected ecc error */
200	SIGSEGV,		/* fast instruction access mmu miss */
201	SIGSEGV,		/* fast data access mmu miss */
202	SIGILL,			/* spill */
203	SIGILL,			/* fill */
204	SIGILL,			/* fill */
205	SIGTRAP,		/* breakpoint */
206	SIGILL,			/* clean window */
207	SIGILL,			/* range check */
208	SIGILL,			/* fix alignment */
209	SIGILL,			/* integer overflow */
210	SIGSYS,			/* syscall */
211	-1,			/* restore physical watchpoint */
212	-1,			/* restore virtual watchpoint */
213	-1,			/* kernel stack fault */
214};
215
216CTASSERT(sizeof(struct trapframe) == 256);
217
218int debugger_on_signal = 0;
219SYSCTL_INT(_debug, OID_AUTO, debugger_on_signal, CTLFLAG_RW,
220    &debugger_on_signal, 0, "");
221
222void
223trap(struct trapframe *tf)
224{
225	struct thread *td;
226	struct proc *p;
227	u_int sticks;
228	int error;
229	int sig;
230
231	td = PCPU_GET(curthread);
232
233	CTR4(KTR_TRAP, "trap: %p type=%s (%s) pil=%#lx", td,
234	    trap_msg[tf->tf_type & ~T_KERNEL],
235	    (TRAPF_USERMODE(tf) ? "user" : "kernel"), rdpr(pil));
236
237	atomic_add_int(&cnt.v_trap, 1);
238
239	if ((tf->tf_tstate & TSTATE_PRIV) == 0) {
240		KASSERT(td != NULL, ("trap: curthread NULL"));
241		KASSERT(td->td_proc != NULL, ("trap: curproc NULL"));
242
243		p = td->td_proc;
244		sticks = td->td_sticks;
245		td->td_frame = tf;
246		if (td->td_ucred != p->p_ucred)
247			cred_update_thread(td);
248
249		switch (tf->tf_type) {
250		case T_DATA_MISS:
251		case T_DATA_PROTECTION:
252		case T_INSTRUCTION_MISS:
253			sig = trap_pfault(td, tf);
254			break;
255		case T_FILL:
256			sig = rwindow_load(td, tf, 2);
257			break;
258		case T_FILL_RET:
259			sig = rwindow_load(td, tf, 1);
260			break;
261		case T_SPILL:
262			sig = rwindow_save(td);
263			break;
264		default:
265			if (tf->tf_type < 0 || tf->tf_type >= T_MAX ||
266			    trap_sig[tf->tf_type] == -1)
267				panic("trap: bad trap type");
268			sig = trap_sig[tf->tf_type];
269			break;
270		}
271
272		if (sig != 0) {
273			/* Translate fault for emulators. */
274			if (p->p_sysent->sv_transtrap != NULL) {
275				sig = p->p_sysent->sv_transtrap(sig,
276				    tf->tf_type);
277			}
278			if (debugger_on_signal &&
279			    (sig == 4 || sig == 10 || sig == 11))
280				Debugger("trapsig");
281			trapsignal(td, sig, tf->tf_type);
282		}
283
284		userret(td, tf, sticks);
285		mtx_assert(&Giant, MA_NOTOWNED);
286#ifdef DIAGNOSTIC
287		cred_free_thread(td);
288#endif
289 	} else {
290		KASSERT((tf->tf_type & T_KERNEL) != 0,
291		    ("trap: kernel trap isn't"));
292
293		switch (tf->tf_type & ~T_KERNEL) {
294#ifdef DDB
295		case T_BREAKPOINT:
296		case T_KSTACK_FAULT:
297			error = (kdb_trap(tf) == 0);
298			break;
299#ifdef notyet
300		case T_PA_WATCHPOINT:
301		case T_VA_WATCHPOINT:
302			error = db_watch_trap(tf);
303			break;
304#endif
305#endif
306		case T_DATA_MISS:
307		case T_DATA_PROTECTION:
308		case T_INSTRUCTION_MISS:
309			error = trap_pfault(td, tf);
310			break;
311		case T_DATA_EXCEPTION:
312		case T_MEM_ADDRESS_NOT_ALIGNED:
313			if ((tf->tf_sfsr & MMU_SFSR_FV) != 0 &&
314			    MMU_SFSR_GET_ASI(tf->tf_sfsr) == ASI_AIUP) {
315				if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
316				    tf->tf_tpc <= (u_long)copy_nofault_end) {
317					tf->tf_tpc = (u_long)copy_fault;
318					tf->tf_tnpc = tf->tf_tpc + 4;
319					error = 0;
320					break;
321				}
322				if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
323				    tf->tf_tpc <= (u_long)fs_nofault_end) {
324					tf->tf_tpc = (u_long)fs_fault;
325					tf->tf_tnpc = tf->tf_tpc + 4;
326					error = 0;
327					break;
328				}
329			}
330			error = 1;
331			break;
332		default:
333			error = 1;
334			break;
335		}
336
337		if (error != 0)
338			panic("trap: %s", trap_msg[tf->tf_type & ~T_KERNEL]);
339	}
340	CTR1(KTR_TRAP, "trap: td=%p return", td);
341}
342
343static int
344trap_pfault(struct thread *td, struct trapframe *tf)
345{
346	struct vmspace *vm;
347	struct pcb *pcb;
348	struct proc *p;
349	vm_offset_t va;
350	vm_prot_t prot;
351	u_long ctx;
352	int flags;
353	int type;
354	int rv;
355
356	if (td == NULL)
357		return (-1);
358	KASSERT(td->td_pcb != NULL, ("trap_pfault: pcb NULL"));
359	KASSERT(td->td_proc != NULL, ("trap_pfault: curproc NULL"));
360	KASSERT(td->td_proc->p_vmspace != NULL, ("trap_pfault: vmspace NULL"));
361
362	p = td->td_proc;
363
364	rv = KERN_SUCCESS;
365	ctx = TLB_TAR_CTX(tf->tf_tar);
366	pcb = td->td_pcb;
367	type = tf->tf_type & ~T_KERNEL;
368	va = TLB_TAR_VA(tf->tf_tar);
369
370	CTR4(KTR_TRAP, "trap_pfault: td=%p pm_ctx=%#lx va=%#lx ctx=%#lx",
371	    td, p->p_vmspace->vm_pmap.pm_context[PCPU_GET(cpuid)], va, ctx);
372
373	if (type == T_DATA_PROTECTION) {
374		prot = VM_PROT_WRITE;
375		flags = VM_FAULT_DIRTY;
376	} else {
377		if (type == T_DATA_MISS)
378			prot = VM_PROT_READ;
379		else
380			prot = VM_PROT_READ | VM_PROT_EXECUTE;
381		flags = VM_FAULT_NORMAL;
382	}
383
384	if (ctx != TLB_CTX_KERNEL) {
385		if ((tf->tf_tstate & TSTATE_PRIV) != 0 &&
386		    (tf->tf_tpc >= (u_long)fs_nofault_intr_begin &&
387		     tf->tf_tpc <= (u_long)fs_nofault_intr_end)) {
388			tf->tf_tpc = (u_long)fs_fault;
389			tf->tf_tnpc = tf->tf_tpc + 4;
390			return (0);
391		}
392
393		/*
394		 * This is a fault on non-kernel virtual memory.
395		 */
396		vm = p->p_vmspace;
397
398		/*
399		 * Keep swapout from messing with us during this
400		 * critical time.
401		 */
402		PROC_LOCK(p);
403		++p->p_lock;
404		PROC_UNLOCK(p);
405
406		/* Fault in the user page. */
407		rv = vm_fault(&vm->vm_map, va, prot, flags);
408
409		/*
410		 * Now the process can be swapped again.
411		 */
412		PROC_LOCK(p);
413		--p->p_lock;
414		PROC_UNLOCK(p);
415	} else {
416		/*
417		 * This is a fault on kernel virtual memory.  Attempts to
418		 * access kernel memory from user mode cause privileged
419		 * action traps, not page fault.
420		 */
421		KASSERT(tf->tf_tstate & TSTATE_PRIV,
422		    ("trap_pfault: fault on nucleus context from user mode"));
423
424		/*
425		 * Don't have to worry about process locking or stacks in the
426		 * kernel.
427		 */
428		rv = vm_fault(kernel_map, va, prot, VM_FAULT_NORMAL);
429	}
430
431	CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d",
432	    td, va, rv);
433	if (rv == KERN_SUCCESS)
434		return (0);
435	if (ctx != TLB_CTX_KERNEL && (tf->tf_tstate & TSTATE_PRIV) != 0) {
436		if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
437		    tf->tf_tpc <= (u_long)fs_nofault_end) {
438			tf->tf_tpc = (u_long)fs_fault;
439			tf->tf_tnpc = tf->tf_tpc + 4;
440			return (0);
441		}
442		if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
443		    tf->tf_tpc <= (u_long)copy_nofault_end) {
444			tf->tf_tpc = (u_long)copy_fault;
445			tf->tf_tnpc = tf->tf_tpc + 4;
446			return (0);
447		}
448	}
449	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
450}
451
452/* Maximum number of arguments that can be passed via the out registers. */
453#define	REG_MAXARGS	6
454
455/*
456 * Syscall handler. The arguments to the syscall are passed in the o registers
457 * by the caller, and are saved in the trap frame. The syscall number is passed
458 * in %g1 (and also saved in the trap frame).
459 */
460void
461syscall(struct trapframe *tf)
462{
463	struct sysent *callp;
464	struct thread *td;
465	register_t args[8];
466	register_t *argp;
467	struct proc *p;
468	u_int sticks;
469	u_long code;
470	u_long tpc;
471	int reg;
472	int regcnt;
473	int narg;
474	int error;
475
476	td = PCPU_GET(curthread);
477	KASSERT(td != NULL, ("trap: curthread NULL"));
478	KASSERT(td->td_proc != NULL, ("trap: curproc NULL"));
479
480	p = td->td_proc;
481
482	atomic_add_int(&cnt.v_syscall, 1);
483
484	narg = 0;
485	error = 0;
486	reg = 0;
487	regcnt = REG_MAXARGS;
488
489	sticks = td->td_sticks;
490	td->td_frame = tf;
491	if (td->td_ucred != p->p_ucred)
492		cred_update_thread(td);
493	if (p->p_flag & P_THREADED)
494		thread_user_enter(p, td);
495	code = tf->tf_global[1];
496
497	/*
498	 * For syscalls, we don't want to retry the faulting instruction
499	 * (usually), instead we need to advance one instruction.
500	 */
501	tpc = tf->tf_tpc;
502	TF_DONE(tf);
503
504	if (p->p_sysent->sv_prepsyscall) {
505		/*
506		 * The prep code is MP aware.
507		 */
508#if 0
509		(*p->p_sysent->sv_prepsyscall)(tf, args, &code, &params);
510#endif
511	} else 	if (code == SYS_syscall || code == SYS___syscall) {
512		code = tf->tf_out[reg++];
513		regcnt--;
514	}
515
516 	if (p->p_sysent->sv_mask)
517 		code &= p->p_sysent->sv_mask;
518
519 	if (code >= p->p_sysent->sv_size)
520 		callp = &p->p_sysent->sv_table[0];
521  	else
522 		callp = &p->p_sysent->sv_table[code];
523
524	narg = callp->sy_narg & SYF_ARGMASK;
525
526	if (narg <= regcnt) {
527		argp = &tf->tf_out[reg];
528		error = 0;
529	} else {
530		KASSERT(narg <= sizeof(args) / sizeof(args[0]),
531		    ("Too many syscall arguments!"));
532		argp = args;
533		bcopy(&tf->tf_out[reg], args, sizeof(args[0]) * regcnt);
534		error = copyin((void *)(tf->tf_out[6] + SPOFF +
535		    offsetof(struct frame, fr_pad[6])),
536		    &args[regcnt], (narg - regcnt) * sizeof(args[0]));
537	}
538
539	CTR5(KTR_SYSC, "syscall: td=%p %s(%#lx, %#lx, %#lx)", td,
540	    syscallnames[code], argp[0], argp[1], argp[2]);
541
542	/*
543	 * Try to run the syscall without the MP lock if the syscall
544	 * is MP safe.
545	 */
546	if ((callp->sy_narg & SYF_MPSAFE) == 0)
547		mtx_lock(&Giant);
548
549#ifdef KTRACE
550	if (KTRPOINT(td, KTR_SYSCALL))
551		ktrsyscall(code, narg, argp);
552#endif
553	if (error == 0) {
554		td->td_retval[0] = 0;
555		td->td_retval[1] = 0;
556
557		STOPEVENT(p, S_SCE, narg);	/* MP aware */
558
559		error = (*callp->sy_call)(td, argp);
560
561		CTR5(KTR_SYSC, "syscall: p=%p error=%d %s return %#lx %#lx ", p,
562		    error, syscallnames[code], td->td_retval[0],
563		    td->td_retval[1]);
564	}
565
566	/*
567	 * MP SAFE (we may or may not have the MP lock at this point)
568	 */
569	switch (error) {
570	case 0:
571		tf->tf_out[0] = td->td_retval[0];
572		tf->tf_out[1] = td->td_retval[1];
573		tf->tf_tstate &= ~TSTATE_XCC_C;
574		break;
575
576	case ERESTART:
577		/*
578		 * Undo the tpc advancement we have done above, we want to
579		 * reexecute the system call.
580		 */
581		tf->tf_tpc = tpc;
582		tf->tf_tnpc -= 4;
583		break;
584
585	case EJUSTRETURN:
586		break;
587
588	default:
589 		if (p->p_sysent->sv_errsize) {
590 			if (error >= p->p_sysent->sv_errsize)
591  				error = -1;	/* XXX */
592   			else
593  				error = p->p_sysent->sv_errtbl[error];
594		}
595		tf->tf_out[0] = error;
596		tf->tf_tstate |= TSTATE_XCC_C;
597		break;
598	}
599
600	/*
601	 * Release Giant if we had to get it.  Don't use mtx_owned(),
602	 * we want to catch broken syscalls.
603	 */
604	if ((callp->sy_narg & SYF_MPSAFE) == 0)
605		mtx_unlock(&Giant);
606
607	/*
608	 * Handle reschedule and other end-of-syscall issues
609	 */
610	userret(td, tf, sticks);
611
612#ifdef KTRACE
613	if (KTRPOINT(td, KTR_SYSRET))
614		ktrsysret(code, error, td->td_retval[0]);
615#endif
616	/*
617	 * This works because errno is findable through the
618	 * register set.  If we ever support an emulation where this
619	 * is not the case, this code will need to be revisited.
620	 */
621	STOPEVENT(p, S_SCX, code);
622
623#ifdef DIAGNOSTIC
624	cred_free_thread(td);
625#endif
626	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
627	    syscallnames[code]);
628	mtx_assert(&sched_lock, MA_NOTOWNED);
629	mtx_assert(&Giant, MA_NOTOWNED);
630}
631