trap.c revision 120965
1/*-
2 * Copyright (c) 2001, Jake Burkholder
3 * Copyright (C) 1994, David Greenman
4 * Copyright (c) 1990, 1993
5 *      The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the University of Utah, and William Jolitz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *      This product includes software developed by the University of
21 *      California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *      from: @(#)trap.c        7.4 (Berkeley) 5/13/91
39 * 	from: FreeBSD: src/sys/i386/i386/trap.c,v 1.197 2001/07/19
40 * $FreeBSD: head/sys/sparc64/sparc64/trap.c 120965 2003-10-10 10:31:48Z robert $
41 */
42
43#include "opt_ddb.h"
44#include "opt_ktr.h"
45#include "opt_ktrace.h"
46
47#include <sys/param.h>
48#include <sys/kernel.h>
49#include <sys/bus.h>
50#include <sys/interrupt.h>
51#include <sys/ktr.h>
52#include <sys/lock.h>
53#include <sys/mutex.h>
54#include <sys/systm.h>
55#include <sys/pioctl.h>
56#include <sys/ptrace.h>
57#include <sys/proc.h>
58#include <sys/smp.h>
59#include <sys/signalvar.h>
60#include <sys/syscall.h>
61#include <sys/sysctl.h>
62#include <sys/sysent.h>
63#include <sys/user.h>
64#include <sys/vmmeter.h>
65#ifdef KTRACE
66#include <sys/uio.h>
67#include <sys/ktrace.h>
68#endif
69
70#include <vm/vm.h>
71#include <vm/pmap.h>
72#include <vm/vm_extern.h>
73#include <vm/vm_param.h>
74#include <vm/vm_kern.h>
75#include <vm/vm_map.h>
76#include <vm/vm_page.h>
77
78#include <machine/clock.h>
79#include <machine/cpu.h>
80#include <machine/frame.h>
81#include <machine/intr_machdep.h>
82#include <machine/pcb.h>
83#include <machine/smp.h>
84#include <machine/trap.h>
85#include <machine/tstate.h>
86#include <machine/tte.h>
87#include <machine/tlb.h>
88#include <machine/tsb.h>
89#include <machine/watch.h>
90
91void trap(struct trapframe *tf);
92void syscall(struct trapframe *tf);
93
94static int trap_pfault(struct thread *td, struct trapframe *tf);
95
96extern char copy_fault[];
97extern char copy_nofault_begin[];
98extern char copy_nofault_end[];
99
100extern char fs_fault[];
101extern char fs_nofault_begin[];
102extern char fs_nofault_end[];
103extern char fs_nofault_intr_begin[];
104extern char fs_nofault_intr_end[];
105
106extern char fas_fault[];
107extern char fas_nofault_begin[];
108extern char fas_nofault_end[];
109
110extern char *syscallnames[];
111
112const char *trap_msg[] = {
113	"reserved",
114	"instruction access exception",
115	"instruction access error",
116	"instruction access protection",
117	"illtrap instruction",
118	"illegal instruction",
119	"privileged opcode",
120	"floating point disabled",
121	"floating point exception ieee 754",
122	"floating point exception other",
123	"tag overflow",
124	"division by zero",
125	"data access exception",
126	"data access error",
127	"data access protection",
128	"memory address not aligned",
129	"privileged action",
130	"async data error",
131	"trap instruction 16",
132	"trap instruction 17",
133	"trap instruction 18",
134	"trap instruction 19",
135	"trap instruction 20",
136	"trap instruction 21",
137	"trap instruction 22",
138	"trap instruction 23",
139	"trap instruction 24",
140	"trap instruction 25",
141	"trap instruction 26",
142	"trap instruction 27",
143	"trap instruction 28",
144	"trap instruction 29",
145	"trap instruction 30",
146	"trap instruction 31",
147	"fast instruction access mmu miss",
148	"fast data access mmu miss",
149	"interrupt",
150	"physical address watchpoint",
151	"virtual address watchpoint",
152	"corrected ecc error",
153	"spill",
154	"fill",
155	"fill",
156	"breakpoint",
157	"clean window",
158	"range check",
159	"fix alignment",
160	"integer overflow",
161	"syscall",
162	"restore physical watchpoint",
163	"restore virtual watchpoint",
164	"kernel stack fault",
165};
166
167const int trap_sig[] = {
168	SIGILL,			/* reserved */
169	SIGILL,			/* instruction access exception */
170	SIGILL,			/* instruction access error */
171	SIGILL,			/* instruction access protection */
172	SIGILL,			/* illtrap instruction */
173	SIGILL,			/* illegal instruction */
174	SIGBUS,			/* privileged opcode */
175	SIGFPE,			/* floating point disabled */
176	SIGFPE,			/* floating point exception ieee 754 */
177	SIGFPE,			/* floating point exception other */
178	SIGEMT,			/* tag overflow */
179	SIGFPE,			/* division by zero */
180	SIGILL,			/* data access exception */
181	SIGILL,			/* data access error */
182	SIGBUS,			/* data access protection */
183	SIGBUS,			/* memory address not aligned */
184	SIGBUS,			/* privileged action */
185	SIGBUS,			/* async data error */
186	SIGILL,			/* trap instruction 16 */
187	SIGILL,			/* trap instruction 17 */
188	SIGILL,			/* trap instruction 18 */
189	SIGILL,			/* trap instruction 19 */
190	SIGILL,			/* trap instruction 20 */
191	SIGILL,			/* trap instruction 21 */
192	SIGILL,			/* trap instruction 22 */
193	SIGILL,			/* trap instruction 23 */
194	SIGILL,			/* trap instruction 24 */
195	SIGILL,			/* trap instruction 25 */
196	SIGILL,			/* trap instruction 26 */
197	SIGILL,			/* trap instruction 27 */
198	SIGILL,			/* trap instruction 28 */
199	SIGILL,			/* trap instruction 29 */
200	SIGILL,			/* trap instruction 30 */
201	SIGILL,			/* trap instruction 31 */
202	SIGSEGV,		/* fast instruction access mmu miss */
203	SIGSEGV,		/* fast data access mmu miss */
204	-1,			/* interrupt */
205	-1,			/* physical address watchpoint */
206	-1,			/* virtual address watchpoint */
207	-1,			/* corrected ecc error */
208	SIGILL,			/* spill */
209	SIGILL,			/* fill */
210	SIGILL,			/* fill */
211	SIGTRAP,		/* breakpoint */
212	SIGILL,			/* clean window */
213	SIGILL,			/* range check */
214	SIGILL,			/* fix alignment */
215	SIGILL,			/* integer overflow */
216	SIGSYS,			/* syscall */
217	-1,			/* restore physical watchpoint */
218	-1,			/* restore virtual watchpoint */
219	-1,			/* kernel stack fault */
220};
221
222CTASSERT(sizeof(struct trapframe) == 256);
223
224int debugger_on_signal = 0;
225SYSCTL_INT(_debug, OID_AUTO, debugger_on_signal, CTLFLAG_RW,
226    &debugger_on_signal, 0, "");
227
228void
229trap(struct trapframe *tf)
230{
231	struct thread *td;
232	struct proc *p;
233	u_int sticks;
234	int error;
235	int sig;
236
237	td = PCPU_GET(curthread);
238
239	CTR4(KTR_TRAP, "trap: %p type=%s (%s) pil=%#lx", td,
240	    trap_msg[tf->tf_type & ~T_KERNEL],
241	    (TRAPF_USERMODE(tf) ? "user" : "kernel"), rdpr(pil));
242
243	atomic_add_int(&cnt.v_trap, 1);
244
245	if ((tf->tf_tstate & TSTATE_PRIV) == 0) {
246		KASSERT(td != NULL, ("trap: curthread NULL"));
247		KASSERT(td->td_proc != NULL, ("trap: curproc NULL"));
248
249		p = td->td_proc;
250		sticks = td->td_sticks;
251		td->td_frame = tf;
252		if (td->td_ucred != p->p_ucred)
253			cred_update_thread(td);
254
255		switch (tf->tf_type) {
256		case T_DATA_MISS:
257		case T_DATA_PROTECTION:
258		case T_INSTRUCTION_MISS:
259			sig = trap_pfault(td, tf);
260			break;
261		case T_FILL:
262			sig = rwindow_load(td, tf, 2);
263			break;
264		case T_FILL_RET:
265			sig = rwindow_load(td, tf, 1);
266			break;
267		case T_SPILL:
268			sig = rwindow_save(td);
269			break;
270		default:
271			if (tf->tf_type < 0 || tf->tf_type >= T_MAX ||
272			    trap_sig[tf->tf_type] == -1)
273				panic("trap: bad trap type");
274			sig = trap_sig[tf->tf_type];
275			break;
276		}
277
278		if (sig != 0) {
279			/* Translate fault for emulators. */
280			if (p->p_sysent->sv_transtrap != NULL) {
281				sig = p->p_sysent->sv_transtrap(sig,
282				    tf->tf_type);
283			}
284			if (debugger_on_signal &&
285			    (sig == 4 || sig == 10 || sig == 11))
286				Debugger("trapsig");
287			trapsignal(td, sig, tf->tf_type);
288		}
289
290		userret(td, tf, sticks);
291		mtx_assert(&Giant, MA_NOTOWNED);
292#ifdef DIAGNOSTIC
293		cred_free_thread(td);
294#endif
295 	} else {
296		KASSERT((tf->tf_type & T_KERNEL) != 0,
297		    ("trap: kernel trap isn't"));
298
299		switch (tf->tf_type & ~T_KERNEL) {
300#ifdef DDB
301		case T_BREAKPOINT:
302		case T_KSTACK_FAULT:
303			error = (kdb_trap(tf) == 0);
304			break;
305#ifdef notyet
306		case T_PA_WATCHPOINT:
307		case T_VA_WATCHPOINT:
308			error = db_watch_trap(tf);
309			break;
310#endif
311#endif
312		case T_DATA_MISS:
313		case T_DATA_PROTECTION:
314		case T_INSTRUCTION_MISS:
315			error = trap_pfault(td, tf);
316			break;
317		case T_DATA_EXCEPTION:
318		case T_MEM_ADDRESS_NOT_ALIGNED:
319			if ((tf->tf_sfsr & MMU_SFSR_FV) != 0 &&
320			    MMU_SFSR_GET_ASI(tf->tf_sfsr) == ASI_AIUP) {
321				if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
322				    tf->tf_tpc <= (u_long)copy_nofault_end) {
323					tf->tf_tpc = (u_long)copy_fault;
324					tf->tf_tnpc = tf->tf_tpc + 4;
325					error = 0;
326					break;
327				}
328				if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
329				    tf->tf_tpc <= (u_long)fs_nofault_end) {
330					tf->tf_tpc = (u_long)fs_fault;
331					tf->tf_tnpc = tf->tf_tpc + 4;
332					error = 0;
333					break;
334				}
335			}
336			error = 1;
337			break;
338		case T_DATA_ERROR:
339			/*
340			 * handle PCI poke/peek as per UltraSPARC IIi
341			 * User's Manual 16.2.1.
342			 */
343#define MEMBARSYNC_INST	((u_int32_t)0x8143e040)
344			if (tf->tf_tpc > (u_long)fas_nofault_begin &&
345			    tf->tf_tpc < (u_long)fas_nofault_end &&
346			    *(u_int32_t *)tf->tf_tpc == MEMBARSYNC_INST &&
347			    ((u_int32_t *)tf->tf_tpc)[-2] == MEMBARSYNC_INST) {
348				tf->tf_tpc = (u_long)fas_fault;
349				tf->tf_tnpc = tf->tf_tpc + 4;
350				error = 0;
351				break;
352			}
353#undef MEMBARSYNC_INST
354			error = 1;
355			break;
356		default:
357			error = 1;
358			break;
359		}
360
361		if (error != 0)
362			panic("trap: %s", trap_msg[tf->tf_type & ~T_KERNEL]);
363	}
364	CTR1(KTR_TRAP, "trap: td=%p return", td);
365}
366
367static int
368trap_pfault(struct thread *td, struct trapframe *tf)
369{
370	struct vmspace *vm;
371	struct pcb *pcb;
372	struct proc *p;
373	vm_offset_t va;
374	vm_prot_t prot;
375	u_long ctx;
376	int flags;
377	int type;
378	int rv;
379
380	if (td == NULL)
381		return (-1);
382	KASSERT(td->td_pcb != NULL, ("trap_pfault: pcb NULL"));
383	KASSERT(td->td_proc != NULL, ("trap_pfault: curproc NULL"));
384	KASSERT(td->td_proc->p_vmspace != NULL, ("trap_pfault: vmspace NULL"));
385
386	p = td->td_proc;
387
388	rv = KERN_SUCCESS;
389	ctx = TLB_TAR_CTX(tf->tf_tar);
390	pcb = td->td_pcb;
391	type = tf->tf_type & ~T_KERNEL;
392	va = TLB_TAR_VA(tf->tf_tar);
393
394	CTR4(KTR_TRAP, "trap_pfault: td=%p pm_ctx=%#lx va=%#lx ctx=%#lx",
395	    td, p->p_vmspace->vm_pmap.pm_context[PCPU_GET(cpuid)], va, ctx);
396
397	if (type == T_DATA_PROTECTION) {
398		prot = VM_PROT_WRITE;
399		flags = VM_FAULT_DIRTY;
400	} else {
401		if (type == T_DATA_MISS)
402			prot = VM_PROT_READ;
403		else
404			prot = VM_PROT_READ | VM_PROT_EXECUTE;
405		flags = VM_FAULT_NORMAL;
406	}
407
408	if (ctx != TLB_CTX_KERNEL) {
409		if ((tf->tf_tstate & TSTATE_PRIV) != 0 &&
410		    (tf->tf_tpc >= (u_long)fs_nofault_intr_begin &&
411		     tf->tf_tpc <= (u_long)fs_nofault_intr_end)) {
412			tf->tf_tpc = (u_long)fs_fault;
413			tf->tf_tnpc = tf->tf_tpc + 4;
414			return (0);
415		}
416
417		/*
418		 * This is a fault on non-kernel virtual memory.
419		 */
420		vm = p->p_vmspace;
421
422		/*
423		 * Keep swapout from messing with us during this
424		 * critical time.
425		 */
426		PROC_LOCK(p);
427		++p->p_lock;
428		PROC_UNLOCK(p);
429
430		/* Fault in the user page. */
431		rv = vm_fault(&vm->vm_map, va, prot, flags);
432
433		/*
434		 * Now the process can be swapped again.
435		 */
436		PROC_LOCK(p);
437		--p->p_lock;
438		PROC_UNLOCK(p);
439	} else {
440		/*
441		 * This is a fault on kernel virtual memory.  Attempts to
442		 * access kernel memory from user mode cause privileged
443		 * action traps, not page fault.
444		 */
445		KASSERT(tf->tf_tstate & TSTATE_PRIV,
446		    ("trap_pfault: fault on nucleus context from user mode"));
447
448		/*
449		 * Don't have to worry about process locking or stacks in the
450		 * kernel.
451		 */
452		rv = vm_fault(kernel_map, va, prot, VM_FAULT_NORMAL);
453	}
454
455	CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d",
456	    td, va, rv);
457	if (rv == KERN_SUCCESS)
458		return (0);
459	if (ctx != TLB_CTX_KERNEL && (tf->tf_tstate & TSTATE_PRIV) != 0) {
460		if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
461		    tf->tf_tpc <= (u_long)fs_nofault_end) {
462			tf->tf_tpc = (u_long)fs_fault;
463			tf->tf_tnpc = tf->tf_tpc + 4;
464			return (0);
465		}
466		if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
467		    tf->tf_tpc <= (u_long)copy_nofault_end) {
468			tf->tf_tpc = (u_long)copy_fault;
469			tf->tf_tnpc = tf->tf_tpc + 4;
470			return (0);
471		}
472	}
473	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
474}
475
476/* Maximum number of arguments that can be passed via the out registers. */
477#define	REG_MAXARGS	6
478
479/*
480 * Syscall handler. The arguments to the syscall are passed in the o registers
481 * by the caller, and are saved in the trap frame. The syscall number is passed
482 * in %g1 (and also saved in the trap frame).
483 */
484void
485syscall(struct trapframe *tf)
486{
487	struct sysent *callp;
488	struct thread *td;
489	register_t args[8];
490	register_t *argp;
491	struct proc *p;
492	u_int sticks;
493	u_long code;
494	u_long tpc;
495	int reg;
496	int regcnt;
497	int narg;
498	int error;
499
500	td = PCPU_GET(curthread);
501	KASSERT(td != NULL, ("trap: curthread NULL"));
502	KASSERT(td->td_proc != NULL, ("trap: curproc NULL"));
503
504	p = td->td_proc;
505
506	atomic_add_int(&cnt.v_syscall, 1);
507
508	narg = 0;
509	error = 0;
510	reg = 0;
511	regcnt = REG_MAXARGS;
512
513	sticks = td->td_sticks;
514	td->td_frame = tf;
515	if (td->td_ucred != p->p_ucred)
516		cred_update_thread(td);
517	if (p->p_flag & P_SA)
518		thread_user_enter(p, td);
519	code = tf->tf_global[1];
520
521	/*
522	 * For syscalls, we don't want to retry the faulting instruction
523	 * (usually), instead we need to advance one instruction.
524	 */
525	tpc = tf->tf_tpc;
526	TF_DONE(tf);
527
528	if (p->p_sysent->sv_prepsyscall) {
529		/*
530		 * The prep code is MP aware.
531		 */
532#if 0
533		(*p->p_sysent->sv_prepsyscall)(tf, args, &code, &params);
534#endif
535	} else 	if (code == SYS_syscall || code == SYS___syscall) {
536		code = tf->tf_out[reg++];
537		regcnt--;
538	}
539
540 	if (p->p_sysent->sv_mask)
541 		code &= p->p_sysent->sv_mask;
542
543 	if (code >= p->p_sysent->sv_size)
544 		callp = &p->p_sysent->sv_table[0];
545  	else
546 		callp = &p->p_sysent->sv_table[code];
547
548	narg = callp->sy_narg & SYF_ARGMASK;
549
550	if (narg <= regcnt) {
551		argp = &tf->tf_out[reg];
552		error = 0;
553	} else {
554		KASSERT(narg <= sizeof(args) / sizeof(args[0]),
555		    ("Too many syscall arguments!"));
556		argp = args;
557		bcopy(&tf->tf_out[reg], args, sizeof(args[0]) * regcnt);
558		error = copyin((void *)(tf->tf_out[6] + SPOFF +
559		    offsetof(struct frame, fr_pad[6])),
560		    &args[regcnt], (narg - regcnt) * sizeof(args[0]));
561	}
562
563	CTR5(KTR_SYSC, "syscall: td=%p %s(%#lx, %#lx, %#lx)", td,
564	    syscallnames[code], argp[0], argp[1], argp[2]);
565
566	/*
567	 * Try to run the syscall without the MP lock if the syscall
568	 * is MP safe.
569	 */
570	if ((callp->sy_narg & SYF_MPSAFE) == 0)
571		mtx_lock(&Giant);
572
573#ifdef KTRACE
574	if (KTRPOINT(td, KTR_SYSCALL))
575		ktrsyscall(code, narg, argp);
576#endif
577	if (error == 0) {
578		td->td_retval[0] = 0;
579		td->td_retval[1] = 0;
580
581		STOPEVENT(p, S_SCE, narg);	/* MP aware */
582
583		PTRACESTOP_SC(p, td, S_PT_SCE);
584
585		error = (*callp->sy_call)(td, argp);
586
587		CTR5(KTR_SYSC, "syscall: p=%p error=%d %s return %#lx %#lx ", p,
588		    error, syscallnames[code], td->td_retval[0],
589		    td->td_retval[1]);
590	}
591
592	/*
593	 * MP SAFE (we may or may not have the MP lock at this point)
594	 */
595	switch (error) {
596	case 0:
597		tf->tf_out[0] = td->td_retval[0];
598		tf->tf_out[1] = td->td_retval[1];
599		tf->tf_tstate &= ~TSTATE_XCC_C;
600		break;
601
602	case ERESTART:
603		/*
604		 * Undo the tpc advancement we have done above, we want to
605		 * reexecute the system call.
606		 */
607		tf->tf_tpc = tpc;
608		tf->tf_tnpc -= 4;
609		break;
610
611	case EJUSTRETURN:
612		break;
613
614	default:
615 		if (p->p_sysent->sv_errsize) {
616 			if (error >= p->p_sysent->sv_errsize)
617  				error = -1;	/* XXX */
618   			else
619  				error = p->p_sysent->sv_errtbl[error];
620		}
621		tf->tf_out[0] = error;
622		tf->tf_tstate |= TSTATE_XCC_C;
623		break;
624	}
625
626	/*
627	 * Release Giant if we had to get it.  Don't use mtx_owned(),
628	 * we want to catch broken syscalls.
629	 */
630	if ((callp->sy_narg & SYF_MPSAFE) == 0)
631		mtx_unlock(&Giant);
632
633	/*
634	 * Handle reschedule and other end-of-syscall issues
635	 */
636	userret(td, tf, sticks);
637
638#ifdef KTRACE
639	if (KTRPOINT(td, KTR_SYSRET))
640		ktrsysret(code, error, td->td_retval[0]);
641#endif
642	/*
643	 * This works because errno is findable through the
644	 * register set.  If we ever support an emulation where this
645	 * is not the case, this code will need to be revisited.
646	 */
647	STOPEVENT(p, S_SCX, code);
648
649	PTRACESTOP_SC(p, td, S_PT_SCX);
650
651#ifdef DIAGNOSTIC
652	cred_free_thread(td);
653#endif
654	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
655	    (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
656	mtx_assert(&sched_lock, MA_NOTOWNED);
657	mtx_assert(&Giant, MA_NOTOWNED);
658}
659