trap.c revision 116659
1/*-
2 * Copyright (c) 2001, Jake Burkholder
3 * Copyright (C) 1994, David Greenman
4 * Copyright (c) 1990, 1993
5 *      The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the University of Utah, and William Jolitz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *      This product includes software developed by the University of
21 *      California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *      from: @(#)trap.c        7.4 (Berkeley) 5/13/91
39 * 	from: FreeBSD: src/sys/i386/i386/trap.c,v 1.197 2001/07/19
40 * $FreeBSD: head/sys/sparc64/sparc64/trap.c 116659 2003-06-22 01:26:08Z jmg $
41 */
42
43#include "opt_ddb.h"
44#include "opt_ktr.h"
45#include "opt_ktrace.h"
46
47#include <sys/param.h>
48#include <sys/kernel.h>
49#include <sys/bus.h>
50#include <sys/interrupt.h>
51#include <sys/ktr.h>
52#include <sys/lock.h>
53#include <sys/mutex.h>
54#include <sys/systm.h>
55#include <sys/pioctl.h>
56#include <sys/proc.h>
57#include <sys/smp.h>
58#include <sys/syscall.h>
59#include <sys/sysctl.h>
60#include <sys/sysent.h>
61#include <sys/user.h>
62#include <sys/vmmeter.h>
63#ifdef KTRACE
64#include <sys/uio.h>
65#include <sys/ktrace.h>
66#endif
67
68#include <vm/vm.h>
69#include <vm/pmap.h>
70#include <vm/vm_extern.h>
71#include <vm/vm_param.h>
72#include <vm/vm_kern.h>
73#include <vm/vm_map.h>
74#include <vm/vm_page.h>
75
76#include <machine/clock.h>
77#include <machine/cpu.h>
78#include <machine/frame.h>
79#include <machine/intr_machdep.h>
80#include <machine/pcb.h>
81#include <machine/smp.h>
82#include <machine/trap.h>
83#include <machine/tstate.h>
84#include <machine/tte.h>
85#include <machine/tlb.h>
86#include <machine/tsb.h>
87#include <machine/watch.h>
88
89void trap(struct trapframe *tf);
90void syscall(struct trapframe *tf);
91
92static int trap_pfault(struct thread *td, struct trapframe *tf);
93
94extern char copy_fault[];
95extern char copy_nofault_begin[];
96extern char copy_nofault_end[];
97
98extern char fs_fault[];
99extern char fs_nofault_begin[];
100extern char fs_nofault_end[];
101extern char fs_nofault_intr_begin[];
102extern char fs_nofault_intr_end[];
103
104extern char fas_fault[];
105extern char fas_nofault_begin[];
106extern char fas_nofault_end[];
107
108extern char *syscallnames[];
109
110const char *trap_msg[] = {
111	"reserved",
112	"instruction access exception",
113	"instruction access error",
114	"instruction access protection",
115	"illtrap instruction",
116	"illegal instruction",
117	"privileged opcode",
118	"floating point disabled",
119	"floating point exception ieee 754",
120	"floating point exception other",
121	"tag overflow",
122	"division by zero",
123	"data access exception",
124	"data access error",
125	"data access protection",
126	"memory address not aligned",
127	"privileged action",
128	"async data error",
129	"trap instruction 16",
130	"trap instruction 17",
131	"trap instruction 18",
132	"trap instruction 19",
133	"trap instruction 20",
134	"trap instruction 21",
135	"trap instruction 22",
136	"trap instruction 23",
137	"trap instruction 24",
138	"trap instruction 25",
139	"trap instruction 26",
140	"trap instruction 27",
141	"trap instruction 28",
142	"trap instruction 29",
143	"trap instruction 30",
144	"trap instruction 31",
145	"fast instruction access mmu miss",
146	"fast data access mmu miss",
147	"interrupt",
148	"physical address watchpoint",
149	"virtual address watchpoint",
150	"corrected ecc error",
151	"spill",
152	"fill",
153	"fill",
154	"breakpoint",
155	"clean window",
156	"range check",
157	"fix alignment",
158	"integer overflow",
159	"syscall",
160	"restore physical watchpoint",
161	"restore virtual watchpoint",
162	"kernel stack fault",
163};
164
165const int trap_sig[] = {
166	SIGILL,			/* reserved */
167	SIGILL,			/* instruction access exception */
168	SIGILL,			/* instruction access error */
169	SIGILL,			/* instruction access protection */
170	SIGILL,			/* illtrap instruction */
171	SIGILL,			/* illegal instruction */
172	SIGBUS,			/* privileged opcode */
173	SIGFPE,			/* floating point disabled */
174	SIGFPE,			/* floating point exception ieee 754 */
175	SIGFPE,			/* floating point exception other */
176	SIGEMT,			/* tag overflow */
177	SIGFPE,			/* division by zero */
178	SIGILL,			/* data access exception */
179	SIGILL,			/* data access error */
180	SIGBUS,			/* data access protection */
181	SIGBUS,			/* memory address not aligned */
182	SIGBUS,			/* privileged action */
183	SIGBUS,			/* async data error */
184	SIGILL,			/* trap instruction 16 */
185	SIGILL,			/* trap instruction 17 */
186	SIGILL,			/* trap instruction 18 */
187	SIGILL,			/* trap instruction 19 */
188	SIGILL,			/* trap instruction 20 */
189	SIGILL,			/* trap instruction 21 */
190	SIGILL,			/* trap instruction 22 */
191	SIGILL,			/* trap instruction 23 */
192	SIGILL,			/* trap instruction 24 */
193	SIGILL,			/* trap instruction 25 */
194	SIGILL,			/* trap instruction 26 */
195	SIGILL,			/* trap instruction 27 */
196	SIGILL,			/* trap instruction 28 */
197	SIGILL,			/* trap instruction 29 */
198	SIGILL,			/* trap instruction 30 */
199	SIGILL,			/* trap instruction 31 */
200	SIGSEGV,		/* fast instruction access mmu miss */
201	SIGSEGV,		/* fast data access mmu miss */
202	-1,			/* interrupt */
203	-1,			/* physical address watchpoint */
204	-1,			/* virtual address watchpoint */
205	-1,			/* corrected ecc error */
206	SIGILL,			/* spill */
207	SIGILL,			/* fill */
208	SIGILL,			/* fill */
209	SIGTRAP,		/* breakpoint */
210	SIGILL,			/* clean window */
211	SIGILL,			/* range check */
212	SIGILL,			/* fix alignment */
213	SIGILL,			/* integer overflow */
214	SIGSYS,			/* syscall */
215	-1,			/* restore physical watchpoint */
216	-1,			/* restore virtual watchpoint */
217	-1,			/* kernel stack fault */
218};
219
220CTASSERT(sizeof(struct trapframe) == 256);
221
222int debugger_on_signal = 0;
223SYSCTL_INT(_debug, OID_AUTO, debugger_on_signal, CTLFLAG_RW,
224    &debugger_on_signal, 0, "");
225
226void
227trap(struct trapframe *tf)
228{
229	struct thread *td;
230	struct proc *p;
231	u_int sticks;
232	int error;
233	int sig;
234
235	td = PCPU_GET(curthread);
236
237	CTR4(KTR_TRAP, "trap: %p type=%s (%s) pil=%#lx", td,
238	    trap_msg[tf->tf_type & ~T_KERNEL],
239	    (TRAPF_USERMODE(tf) ? "user" : "kernel"), rdpr(pil));
240
241	atomic_add_int(&cnt.v_trap, 1);
242
243	if ((tf->tf_tstate & TSTATE_PRIV) == 0) {
244		KASSERT(td != NULL, ("trap: curthread NULL"));
245		KASSERT(td->td_proc != NULL, ("trap: curproc NULL"));
246
247		p = td->td_proc;
248		sticks = td->td_sticks;
249		td->td_frame = tf;
250		if (td->td_ucred != p->p_ucred)
251			cred_update_thread(td);
252
253		switch (tf->tf_type) {
254		case T_DATA_MISS:
255		case T_DATA_PROTECTION:
256		case T_INSTRUCTION_MISS:
257			sig = trap_pfault(td, tf);
258			break;
259		case T_FILL:
260			sig = rwindow_load(td, tf, 2);
261			break;
262		case T_FILL_RET:
263			sig = rwindow_load(td, tf, 1);
264			break;
265		case T_SPILL:
266			sig = rwindow_save(td);
267			break;
268		default:
269			if (tf->tf_type < 0 || tf->tf_type >= T_MAX ||
270			    trap_sig[tf->tf_type] == -1)
271				panic("trap: bad trap type");
272			sig = trap_sig[tf->tf_type];
273			break;
274		}
275
276		if (sig != 0) {
277			/* Translate fault for emulators. */
278			if (p->p_sysent->sv_transtrap != NULL) {
279				sig = p->p_sysent->sv_transtrap(sig,
280				    tf->tf_type);
281			}
282			if (debugger_on_signal &&
283			    (sig == 4 || sig == 10 || sig == 11))
284				Debugger("trapsig");
285			trapsignal(td, sig, tf->tf_type);
286		}
287
288		userret(td, tf, sticks);
289		mtx_assert(&Giant, MA_NOTOWNED);
290#ifdef DIAGNOSTIC
291		cred_free_thread(td);
292#endif
293 	} else {
294		KASSERT((tf->tf_type & T_KERNEL) != 0,
295		    ("trap: kernel trap isn't"));
296
297		switch (tf->tf_type & ~T_KERNEL) {
298#ifdef DDB
299		case T_BREAKPOINT:
300		case T_KSTACK_FAULT:
301			error = (kdb_trap(tf) == 0);
302			break;
303#ifdef notyet
304		case T_PA_WATCHPOINT:
305		case T_VA_WATCHPOINT:
306			error = db_watch_trap(tf);
307			break;
308#endif
309#endif
310		case T_DATA_MISS:
311		case T_DATA_PROTECTION:
312		case T_INSTRUCTION_MISS:
313			error = trap_pfault(td, tf);
314			break;
315		case T_DATA_EXCEPTION:
316		case T_MEM_ADDRESS_NOT_ALIGNED:
317			if ((tf->tf_sfsr & MMU_SFSR_FV) != 0 &&
318			    MMU_SFSR_GET_ASI(tf->tf_sfsr) == ASI_AIUP) {
319				if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
320				    tf->tf_tpc <= (u_long)copy_nofault_end) {
321					tf->tf_tpc = (u_long)copy_fault;
322					tf->tf_tnpc = tf->tf_tpc + 4;
323					error = 0;
324					break;
325				}
326				if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
327				    tf->tf_tpc <= (u_long)fs_nofault_end) {
328					tf->tf_tpc = (u_long)fs_fault;
329					tf->tf_tnpc = tf->tf_tpc + 4;
330					error = 0;
331					break;
332				}
333			}
334			error = 1;
335			break;
336		case T_DATA_ERROR:
337			/*
338			 * handle PCI poke/peek as per UltraSPARC IIi
339			 * User's Manual 16.2.1.
340			 *
341			 * XXX - We really should make sure that tpc is
342			 * pointing to the membar #Sync we are expecting.
343			 */
344#define MEMBARSYNC_INST	((u_int32_t)0x8143e040)
345			if (tf->tf_tpc > (u_long)fas_nofault_begin &&
346			    tf->tf_tpc < (u_long)fas_nofault_end &&
347			    *(u_int32_t *)tf->tf_tpc == MEMBARSYNC_INST &&
348			    ((u_int32_t *)tf->tf_tpc)[-2] == MEMBARSYNC_INST) {
349				tf->tf_tpc = (u_long)fas_fault;
350				tf->tf_tnpc = tf->tf_tpc + 4;
351				error = 0;
352				break;
353			}
354#undef MEMBARSYNC_INST
355			error = 1;
356			break;
357		default:
358			error = 1;
359			break;
360		}
361
362		if (error != 0)
363			panic("trap: %s", trap_msg[tf->tf_type & ~T_KERNEL]);
364	}
365	CTR1(KTR_TRAP, "trap: td=%p return", td);
366}
367
368static int
369trap_pfault(struct thread *td, struct trapframe *tf)
370{
371	struct vmspace *vm;
372	struct pcb *pcb;
373	struct proc *p;
374	vm_offset_t va;
375	vm_prot_t prot;
376	u_long ctx;
377	int flags;
378	int type;
379	int rv;
380
381	if (td == NULL)
382		return (-1);
383	KASSERT(td->td_pcb != NULL, ("trap_pfault: pcb NULL"));
384	KASSERT(td->td_proc != NULL, ("trap_pfault: curproc NULL"));
385	KASSERT(td->td_proc->p_vmspace != NULL, ("trap_pfault: vmspace NULL"));
386
387	p = td->td_proc;
388
389	rv = KERN_SUCCESS;
390	ctx = TLB_TAR_CTX(tf->tf_tar);
391	pcb = td->td_pcb;
392	type = tf->tf_type & ~T_KERNEL;
393	va = TLB_TAR_VA(tf->tf_tar);
394
395	CTR4(KTR_TRAP, "trap_pfault: td=%p pm_ctx=%#lx va=%#lx ctx=%#lx",
396	    td, p->p_vmspace->vm_pmap.pm_context[PCPU_GET(cpuid)], va, ctx);
397
398	if (type == T_DATA_PROTECTION) {
399		prot = VM_PROT_WRITE;
400		flags = VM_FAULT_DIRTY;
401	} else {
402		if (type == T_DATA_MISS)
403			prot = VM_PROT_READ;
404		else
405			prot = VM_PROT_READ | VM_PROT_EXECUTE;
406		flags = VM_FAULT_NORMAL;
407	}
408
409	if (ctx != TLB_CTX_KERNEL) {
410		if ((tf->tf_tstate & TSTATE_PRIV) != 0 &&
411		    (tf->tf_tpc >= (u_long)fs_nofault_intr_begin &&
412		     tf->tf_tpc <= (u_long)fs_nofault_intr_end)) {
413			tf->tf_tpc = (u_long)fs_fault;
414			tf->tf_tnpc = tf->tf_tpc + 4;
415			return (0);
416		}
417
418		/*
419		 * This is a fault on non-kernel virtual memory.
420		 */
421		vm = p->p_vmspace;
422
423		/*
424		 * Keep swapout from messing with us during this
425		 * critical time.
426		 */
427		PROC_LOCK(p);
428		++p->p_lock;
429		PROC_UNLOCK(p);
430
431		/* Fault in the user page. */
432		rv = vm_fault(&vm->vm_map, va, prot, flags);
433
434		/*
435		 * Now the process can be swapped again.
436		 */
437		PROC_LOCK(p);
438		--p->p_lock;
439		PROC_UNLOCK(p);
440	} else {
441		/*
442		 * This is a fault on kernel virtual memory.  Attempts to
443		 * access kernel memory from user mode cause privileged
444		 * action traps, not page fault.
445		 */
446		KASSERT(tf->tf_tstate & TSTATE_PRIV,
447		    ("trap_pfault: fault on nucleus context from user mode"));
448
449		/*
450		 * Don't have to worry about process locking or stacks in the
451		 * kernel.
452		 */
453		rv = vm_fault(kernel_map, va, prot, VM_FAULT_NORMAL);
454	}
455
456	CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d",
457	    td, va, rv);
458	if (rv == KERN_SUCCESS)
459		return (0);
460	if (ctx != TLB_CTX_KERNEL && (tf->tf_tstate & TSTATE_PRIV) != 0) {
461		if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
462		    tf->tf_tpc <= (u_long)fs_nofault_end) {
463			tf->tf_tpc = (u_long)fs_fault;
464			tf->tf_tnpc = tf->tf_tpc + 4;
465			return (0);
466		}
467		if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
468		    tf->tf_tpc <= (u_long)copy_nofault_end) {
469			tf->tf_tpc = (u_long)copy_fault;
470			tf->tf_tnpc = tf->tf_tpc + 4;
471			return (0);
472		}
473	}
474	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
475}
476
477/* Maximum number of arguments that can be passed via the out registers. */
478#define	REG_MAXARGS	6
479
480/*
481 * Syscall handler. The arguments to the syscall are passed in the o registers
482 * by the caller, and are saved in the trap frame. The syscall number is passed
483 * in %g1 (and also saved in the trap frame).
484 */
485void
486syscall(struct trapframe *tf)
487{
488	struct sysent *callp;
489	struct thread *td;
490	register_t args[8];
491	register_t *argp;
492	struct proc *p;
493	u_int sticks;
494	u_long code;
495	u_long tpc;
496	int reg;
497	int regcnt;
498	int narg;
499	int error;
500
501	td = PCPU_GET(curthread);
502	KASSERT(td != NULL, ("trap: curthread NULL"));
503	KASSERT(td->td_proc != NULL, ("trap: curproc NULL"));
504
505	p = td->td_proc;
506
507	atomic_add_int(&cnt.v_syscall, 1);
508
509	narg = 0;
510	error = 0;
511	reg = 0;
512	regcnt = REG_MAXARGS;
513
514	sticks = td->td_sticks;
515	td->td_frame = tf;
516	if (td->td_ucred != p->p_ucred)
517		cred_update_thread(td);
518	if (p->p_flag & P_SA)
519		thread_user_enter(p, td);
520	code = tf->tf_global[1];
521
522	/*
523	 * For syscalls, we don't want to retry the faulting instruction
524	 * (usually), instead we need to advance one instruction.
525	 */
526	tpc = tf->tf_tpc;
527	TF_DONE(tf);
528
529	if (p->p_sysent->sv_prepsyscall) {
530		/*
531		 * The prep code is MP aware.
532		 */
533#if 0
534		(*p->p_sysent->sv_prepsyscall)(tf, args, &code, &params);
535#endif
536	} else 	if (code == SYS_syscall || code == SYS___syscall) {
537		code = tf->tf_out[reg++];
538		regcnt--;
539	}
540
541 	if (p->p_sysent->sv_mask)
542 		code &= p->p_sysent->sv_mask;
543
544 	if (code >= p->p_sysent->sv_size)
545 		callp = &p->p_sysent->sv_table[0];
546  	else
547 		callp = &p->p_sysent->sv_table[code];
548
549	narg = callp->sy_narg & SYF_ARGMASK;
550
551	if (narg <= regcnt) {
552		argp = &tf->tf_out[reg];
553		error = 0;
554	} else {
555		KASSERT(narg <= sizeof(args) / sizeof(args[0]),
556		    ("Too many syscall arguments!"));
557		argp = args;
558		bcopy(&tf->tf_out[reg], args, sizeof(args[0]) * regcnt);
559		error = copyin((void *)(tf->tf_out[6] + SPOFF +
560		    offsetof(struct frame, fr_pad[6])),
561		    &args[regcnt], (narg - regcnt) * sizeof(args[0]));
562	}
563
564	CTR5(KTR_SYSC, "syscall: td=%p %s(%#lx, %#lx, %#lx)", td,
565	    syscallnames[code], argp[0], argp[1], argp[2]);
566
567	/*
568	 * Try to run the syscall without the MP lock if the syscall
569	 * is MP safe.
570	 */
571	if ((callp->sy_narg & SYF_MPSAFE) == 0)
572		mtx_lock(&Giant);
573
574#ifdef KTRACE
575	if (KTRPOINT(td, KTR_SYSCALL))
576		ktrsyscall(code, narg, argp);
577#endif
578	if (error == 0) {
579		td->td_retval[0] = 0;
580		td->td_retval[1] = 0;
581
582		STOPEVENT(p, S_SCE, narg);	/* MP aware */
583
584		error = (*callp->sy_call)(td, argp);
585
586		CTR5(KTR_SYSC, "syscall: p=%p error=%d %s return %#lx %#lx ", p,
587		    error, syscallnames[code], td->td_retval[0],
588		    td->td_retval[1]);
589	}
590
591	/*
592	 * MP SAFE (we may or may not have the MP lock at this point)
593	 */
594	switch (error) {
595	case 0:
596		tf->tf_out[0] = td->td_retval[0];
597		tf->tf_out[1] = td->td_retval[1];
598		tf->tf_tstate &= ~TSTATE_XCC_C;
599		break;
600
601	case ERESTART:
602		/*
603		 * Undo the tpc advancement we have done above, we want to
604		 * reexecute the system call.
605		 */
606		tf->tf_tpc = tpc;
607		tf->tf_tnpc -= 4;
608		break;
609
610	case EJUSTRETURN:
611		break;
612
613	default:
614 		if (p->p_sysent->sv_errsize) {
615 			if (error >= p->p_sysent->sv_errsize)
616  				error = -1;	/* XXX */
617   			else
618  				error = p->p_sysent->sv_errtbl[error];
619		}
620		tf->tf_out[0] = error;
621		tf->tf_tstate |= TSTATE_XCC_C;
622		break;
623	}
624
625	/*
626	 * Release Giant if we had to get it.  Don't use mtx_owned(),
627	 * we want to catch broken syscalls.
628	 */
629	if ((callp->sy_narg & SYF_MPSAFE) == 0)
630		mtx_unlock(&Giant);
631
632	/*
633	 * Handle reschedule and other end-of-syscall issues
634	 */
635	userret(td, tf, sticks);
636
637#ifdef KTRACE
638	if (KTRPOINT(td, KTR_SYSRET))
639		ktrsysret(code, error, td->td_retval[0]);
640#endif
641	/*
642	 * This works because errno is findable through the
643	 * register set.  If we ever support an emulation where this
644	 * is not the case, this code will need to be revisited.
645	 */
646	STOPEVENT(p, S_SCX, code);
647
648#ifdef DIAGNOSTIC
649	cred_free_thread(td);
650#endif
651	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
652	    (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
653	mtx_assert(&sched_lock, MA_NOTOWNED);
654	mtx_assert(&Giant, MA_NOTOWNED);
655}
656