trap.c revision 82585
1/*-
2 * Copyright (c) 2001, Jake Burkholder
3 * Copyright (C) 1994, David Greenman
4 * Copyright (c) 1990, 1993
5 *      The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the University of Utah, and William Jolitz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *      This product includes software developed by the University of
21 *      California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *      from: @(#)trap.c        7.4 (Berkeley) 5/13/91
39 * 	from: FreeBSD: src/sys/i386/i386/trap.c,v 1.197 2001/07/19
40 * $FreeBSD: head/sys/sparc64/sparc64/trap.c 82585 2001-08-30 18:50:57Z dillon $
41 */
42
43#include "opt_ddb.h"
44
45#include <sys/param.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/systm.h>
49#include <sys/pioctl.h>
50#include <sys/proc.h>
51#include <sys/syscall.h>
52#include <sys/sysent.h>
53#include <sys/user.h>
54#include <sys/vmmeter.h>
55
56#include <vm/vm.h>
57#include <vm/pmap.h>
58#include <vm/vm_extern.h>
59#include <vm/vm_param.h>
60#include <vm/vm_kern.h>
61#include <vm/vm_map.h>
62#include <vm/vm_page.h>
63
64#include <machine/clock.h>
65#include <machine/frame.h>
66#include <machine/intr_machdep.h>
67#include <machine/pcb.h>
68#include <machine/pv.h>
69#include <machine/trap.h>
70#include <machine/tstate.h>
71#include <machine/tte.h>
72#include <machine/tlb.h>
73#include <machine/tsb.h>
74#include <machine/watch.h>
75
76void trap(struct trapframe *tf);
77int trap_mmu_fault(struct proc *p, struct trapframe *tf);
78void syscall(struct proc *p, struct trapframe *tf, u_int sticks);
79
80const char *trap_msg[] = {
81	"reserved",
82	"power on reset",
83	"watchdog reset",
84	"externally initiated reset",
85	"software initiated reset",
86	"red state exception",
87	"instruction access exception",
88	"instruction access error",
89	"illegal instruction",
90	"privileged opcode",
91	"floating point disabled",
92	"floating point exception ieee 754",
93	"floating point exception other",
94	"tag overflow",
95	"division by zero",
96	"data access exception",
97	"data access error",
98	"memory address not aligned",
99	"lddf memory address not aligned",
100	"stdf memory address not aligned",
101	"privileged action",
102	"interrupt vector",
103	"physical address watchpoint",
104	"virtual address watchpoint",
105	"corrected ecc error",
106	"fast instruction access mmu miss",
107	"fast data access mmu miss",
108	"fast data access protection",
109	"clock",
110	"bad spill",
111	"bad fill",
112	"breakpoint",
113	"syscall",
114};
115
116void
117trap(struct trapframe *tf)
118{
119	u_int sticks;
120	struct proc *p;
121	int error;
122	int ucode;
123	int type;
124	int sig;
125	int mask;
126
127	KASSERT(PCPU_GET(curproc) != NULL, ("trap: curproc NULL"));
128	KASSERT(PCPU_GET(curpcb) != NULL, ("trap: curpcb NULL"));
129
130	p = PCPU_GET(curproc);
131	type = T_TYPE(tf->tf_type);
132	ucode = type;	/* XXX */
133
134	if ((type & T_KERNEL) == 0)
135		sticks = p->p_sticks;
136
137	switch (type) {
138	case T_FP_DISABLED:
139		if (fp_enable_proc(p))
140			goto user;
141		else {
142			sig = SIGFPE;
143			goto trapsig;
144		}
145		break;
146	case T_IMMU_MISS:
147	case T_DMMU_MISS:
148	case T_DMMU_PROT:
149		mtx_lock(&Giant);
150		error = trap_mmu_fault(p, tf);
151		mtx_unlock(&Giant);
152		if (error == 0)
153			goto user;
154		break;
155	case T_INTR:
156		intr_dispatch(T_LEVEL(tf->tf_type), tf);
157		goto user;
158	case T_SYSCALL:
159		/* syscall() calls userret(), so we need goto out; */
160		syscall(p, tf, sticks);
161		goto out;
162#ifdef DDB
163	case T_BREAKPOINT | T_KERNEL:
164		if (kdb_trap(tf) != 0)
165			goto out;
166		break;
167#endif
168	case T_WATCH_VIRT | T_KERNEL:
169		/*
170		 * At the moment, just print the information from the trap,
171		 * remove the watchpoint, use evil magic to execute the
172		 * instruction (we temporarily save the instruction at
173		 * %tnpc, write a trap instruction, resume, and reset the
174		 * watch point when the trap arrives).
175		 * To make sure that no interrupt gets in between and creates
176		 * a potentially large window where the watchpoint is inactive,
177		 * disable interrupts temporarily.
178		 * This is obviously fragile and evilish.
179		 */
180		printf("Virtual watchpoint triggered, tpc=0x%lx, tnpc=0x%lx\n",
181		    tf->tf_tpc, tf->tf_tnpc);
182		PCPU_SET(wp_pstate, (tf->tf_tstate & TSTATE_PSTATE_MASK) >>
183		    TSTATE_PSTATE_SHIFT);
184		tf->tf_tstate &= ~TSTATE_IE;
185		wrpr(pstate, rdpr(pstate), PSTATE_IE);
186		PCPU_SET(wp_insn, *((u_int *)tf->tf_tnpc));
187		*((u_int *)tf->tf_tnpc) = 0x91d03002;	/* ta %xcc, 2 */
188		flush(tf->tf_tnpc);
189		PCPU_SET(wp_va, watch_virt_get(&mask));
190		PCPU_SET(wp_mask, mask);
191		watch_virt_clear();
192		goto out;
193	case T_RESTOREWP | T_KERNEL:
194		/*
195		 * Undo the tweaks tone for T_WATCH, reset the watch point and
196		 * contunue execution.
197		 * Note that here, we run with interrupts enabled, so there
198		 * is a small chance that we will be interrupted before we
199		 * could reset the watch point.
200		 */
201		tf->tf_tstate = (tf->tf_tstate & ~TSTATE_PSTATE_MASK) |
202		    PCPU_GET(wp_pstate) << TSTATE_PSTATE_SHIFT;
203		watch_virt_set_mask(PCPU_GET(wp_va), PCPU_GET(wp_mask));
204		*(u_int *)tf->tf_tpc = PCPU_GET(wp_insn);
205		flush(tf->tf_tpc);
206		goto out;
207	case T_DMMU_MISS | T_KERNEL:
208	case T_DMMU_PROT | T_KERNEL:
209		mtx_lock(&Giant);
210		error = trap_mmu_fault(p, tf);
211		mtx_unlock(&Giant);
212		if (error == 0)
213			goto out;
214		break;
215	case T_INTR | T_KERNEL:
216		intr_dispatch(T_LEVEL(tf->tf_type), tf);
217		goto out;
218	default:
219		break;
220	}
221	panic("trap: %s", trap_msg[type & ~T_KERNEL]);
222
223trapsig:
224	/* Translate fault for emulators. */
225	if (p->p_sysent->sv_transtrap != NULL)
226		sig = (p->p_sysent->sv_transtrap)(sig, type);
227	trapsignal(p, sig, ucode);
228user:
229	userret(p, tf, sticks);
230	if (mtx_owned(&Giant))
231		mtx_unlock(&Giant);
232out:
233	return;
234}
235
236int
237trap_mmu_fault(struct proc *p, struct trapframe *tf)
238{
239	struct mmuframe *mf;
240	struct vmspace *vm;
241	vm_offset_t va;
242	vm_prot_t type;
243	int rv;
244
245	KASSERT(p->p_vmspace != NULL, ("trap_dmmu_miss: vmspace NULL"));
246
247	type = 0;
248	rv = KERN_FAILURE;
249	mf = tf->tf_arg;
250	va = TLB_TAR_VA(mf->mf_tar);
251	switch (tf->tf_type) {
252	case T_DMMU_MISS | T_KERNEL:
253		/*
254		 * If the context is nucleus this is a soft fault on kernel
255		 * memory, just fault in the pages.
256		 */
257		if (TLB_TAR_CTX(mf->mf_tar) == TLB_CTX_KERNEL) {
258			rv = vm_fault(kernel_map, va, VM_PROT_READ,
259			    VM_FAULT_NORMAL);
260			break;
261		}
262
263		/*
264		 * Don't allow kernel mode faults on user memory unless
265		 * pcb_onfault is set.
266		 */
267		if (PCPU_GET(curpcb)->pcb_onfault == NULL)
268			break;
269		/* Fallthrough. */
270	case T_IMMU_MISS:
271	case T_DMMU_MISS:
272		/*
273		 * First try the tsb.  The primary tsb was already searched.
274		 */
275		vm = p->p_vmspace;
276		if (tsb_miss(&vm->vm_pmap, tf->tf_type, mf) == 0) {
277			rv = KERN_SUCCESS;
278			break;
279		}
280
281		/*
282		 * Not found, call the vm system.
283		 */
284
285		if (tf->tf_type == T_IMMU_MISS)
286			type = VM_PROT_EXECUTE | VM_PROT_READ;
287		else
288			type = VM_PROT_READ;
289
290		/*
291		 * Keep the process from being swapped out at this critical
292		 * time.
293		 */
294		PROC_LOCK(p);
295		++p->p_lock;
296		PROC_UNLOCK(p);
297
298		/*
299		 * Grow the stack if necessary.  vm_map_growstack only fails
300		 * if the va falls into a growable stack region and the stack
301		 * growth fails.  If it succeeds, or the va was not within a
302		 * growable stack region, fault in the user page.
303		 */
304		if (vm_map_growstack(p, va) != KERN_SUCCESS)
305			rv = KERN_FAILURE;
306		else
307			rv = vm_fault(&vm->vm_map, va, type, VM_FAULT_NORMAL);
308
309		/*
310		 * Now the process can be swapped again.
311		 */
312		PROC_LOCK(p);
313		--p->p_lock;
314		PROC_UNLOCK(p);
315		break;
316	case T_DMMU_PROT | T_KERNEL:
317		/*
318		 * Protection faults should not happen on kernel memory.
319		 */
320		if (TLB_TAR_CTX(mf->mf_tar) == TLB_CTX_KERNEL)
321			break;
322
323		/*
324		 * Don't allow kernel mode faults on user memory unless
325		 * pcb_onfault is set.
326		 */
327		if (PCPU_GET(curpcb)->pcb_onfault == NULL)
328			break;
329		/* Fallthrough. */
330	case T_DMMU_PROT:
331		/*
332		 * Only look in the tsb.  Write access to an unmapped page
333		 * causes a miss first, so the page must have already been
334		 * brought in by vm_fault, we just need to find the tte and
335		 * update the write bit.  XXX How do we tell them vm system
336		 * that we are now writing?
337		 */
338		vm = p->p_vmspace;
339		if (tsb_miss(&vm->vm_pmap, tf->tf_type, mf) == 0)
340			rv = KERN_SUCCESS;
341		break;
342	default:
343		break;
344	}
345	if (rv == KERN_SUCCESS)
346		return (0);
347	if (tf->tf_type & T_KERNEL) {
348		if (PCPU_GET(curpcb)->pcb_onfault != NULL &&
349		    TLB_TAR_CTX(mf->mf_tar) != TLB_CTX_KERNEL) {
350			tf->tf_tpc = (u_long)PCPU_GET(curpcb)->pcb_onfault;
351			tf->tf_tnpc = tf->tf_tpc + 4;
352			return (0);
353		}
354	}
355	return (rv == KERN_PROTECTION_FAILURE ? SIGBUS : SIGSEGV);
356}
357
358/* Maximum number of arguments that can be passed via the out registers. */
359#define	REG_MAXARGS	6
360
361/*
362 * Syscall handler. The arguments to the syscall are passed in the o registers
363 * by the caller, and are saved in the trap frame. The syscall number is passed
364 * in %g1 (and also saved in the trap frame).
365 */
366void
367syscall(struct proc *p, struct trapframe *tf, u_int sticks)
368{
369	struct sysent *callp;
370	u_long code;
371	u_long tpc;
372	int reg;
373	int regcnt;
374	int narg;
375	int error;
376	register_t args[8];
377	void *argp;
378
379	narg = 0;
380	error = 0;
381	reg = 0;
382	regcnt = REG_MAXARGS;
383	code = tf->tf_global[1];
384	atomic_add_int(&cnt.v_syscall, 1);
385	/*
386	 * For syscalls, we don't want to retry the faulting instruction
387	 * (usually), instead we need to advance one instruction.
388	 */
389	tpc = tf->tf_tpc;
390	tf->tf_tpc = tf->tf_tnpc;
391	tf->tf_tnpc += 4;
392
393	if (p->p_sysent->sv_prepsyscall) {
394		/*
395		 * The prep code is MP aware.
396		 */
397#if 0
398		(*p->p_sysent->sv_prepsyscall)(tf, args, &code, &params);
399#endif
400	} else 	if (code == SYS_syscall || code == SYS___syscall) {
401		code = tf->tf_out[reg++];
402		regcnt--;
403	}
404
405 	if (p->p_sysent->sv_mask)
406 		code &= p->p_sysent->sv_mask;
407
408 	if (code >= p->p_sysent->sv_size)
409 		callp = &p->p_sysent->sv_table[0];
410  	else
411 		callp = &p->p_sysent->sv_table[code];
412
413	narg = callp->sy_narg & SYF_ARGMASK;
414
415	if (narg <= regcnt)
416		argp = &tf->tf_out[reg];
417	else {
418		KASSERT(narg <= sizeof(args) / sizeof(args[0]),
419		    ("Too many syscall arguments!"));
420		argp = args;
421		bcopy(&tf->tf_out[reg], args, sizeof(args[0]) * regcnt);
422		error = copyin((void *)(tf->tf_out[6] + SPOFF +
423		    offsetof(struct frame, f_pad[6])),
424		    &args[reg + regcnt], (narg - regcnt) * sizeof(args[0]));
425		if (error != 0)
426			goto bad;
427	}
428
429	/*
430	 * Try to run the syscall without the MP lock if the syscall
431	 * is MP safe.
432	 */
433	if ((callp->sy_narg & SYF_MPSAFE) == 0)
434		mtx_lock(&Giant);
435
436#ifdef KTRACE
437	/*
438	 * We have to obtain the MP lock no matter what if
439	 * we are ktracing
440	 */
441	if (KTRPOINT(p, KTR_SYSCALL)) {
442		ktrsyscall(p->p_tracep, code, narg, args);
443	}
444#endif
445	p->p_retval[0] = 0;
446	p->p_retval[1] = tf->tf_out[1];
447
448	STOPEVENT(p, S_SCE, narg);	/* MP aware */
449
450	error = (*callp->sy_call)(p, argp);
451
452	/*
453	 * MP SAFE (we may or may not have the MP lock at this point)
454	 */
455	switch (error) {
456	case 0:
457		tf->tf_out[0] = p->p_retval[0];
458		tf->tf_out[1] = p->p_retval[1];
459		tf->tf_tstate &= ~TSTATE_XCC_C;
460		break;
461
462	case ERESTART:
463		/*
464		 * Undo the tpc advancement we have done above, we want to
465		 * reexecute the system call.
466		 */
467		tf->tf_tpc = tpc;
468		tf->tf_tnpc -= 4;
469		break;
470
471	case EJUSTRETURN:
472		break;
473
474	default:
475bad:
476 		if (p->p_sysent->sv_errsize) {
477 			if (error >= p->p_sysent->sv_errsize)
478  				error = -1;	/* XXX */
479   			else
480  				error = p->p_sysent->sv_errtbl[error];
481		}
482		tf->tf_out[0] = error;
483		tf->tf_tstate |= TSTATE_XCC_C;
484		break;
485	}
486
487	/*
488	 * Handle reschedule and other end-of-syscall issues
489	 */
490	userret(p, tf, sticks);
491
492#ifdef KTRACE
493	if (KTRPOINT(p, KTR_SYSRET)) {
494		ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
495	}
496#endif
497
498	/*
499	 * Release Giant if we had to get it.  Don't use mtx_owned(),
500	 * we want to catch broken syscalls.
501	 */
502	if ((callp->sy_narg & SYF_MPSAFE) == 0)
503		mtx_unlock(&Giant);
504
505	/*
506	 * This works because errno is findable through the
507	 * register set.  If we ever support an emulation where this
508	 * is not the case, this code will need to be revisited.
509	 */
510	STOPEVENT(p, S_SCX, code);
511
512#ifdef WITNESS
513	if (witness_list(p)) {
514		panic("system call %s returning with mutex(s) held\n",
515		    syscallnames[code]);
516	}
517#endif
518	mtx_assert(&sched_lock, MA_NOTOWNED);
519	mtx_assert(&Giant, MA_NOTOWNED);
520
521}
522