trap.c revision 82006
1/*-
2 * Copyright (c) 2001, Jake Burkholder
3 * Copyright (C) 1994, David Greenman
4 * Copyright (c) 1990, 1993
5 *      The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the University of Utah, and William Jolitz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *      This product includes software developed by the University of
21 *      California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *      from: @(#)trap.c        7.4 (Berkeley) 5/13/91
39 * 	from: FreeBSD: src/sys/i386/i386/trap.c,v 1.197 2001/07/19
40 * $FreeBSD: head/sys/sparc64/sparc64/trap.c 82006 2001-08-20 23:43:43Z jake $
41 */
42
43#include "opt_ddb.h"
44
45#include <sys/param.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/systm.h>
49#include <sys/pioctl.h>
50#include <sys/proc.h>
51#include <sys/syscall.h>
52#include <sys/sysent.h>
53#include <sys/user.h>
54#include <sys/vmmeter.h>
55
56#include <vm/vm.h>
57#include <vm/pmap.h>
58#include <vm/vm_extern.h>
59#include <vm/vm_param.h>
60#include <vm/vm_kern.h>
61#include <vm/vm_map.h>
62#include <vm/vm_page.h>
63
64#include <machine/clock.h>
65#include <machine/frame.h>
66#include <machine/intr_machdep.h>
67#include <machine/pcb.h>
68#include <machine/pv.h>
69#include <machine/trap.h>
70#include <machine/tstate.h>
71#include <machine/tte.h>
72#include <machine/tlb.h>
73#include <machine/tsb.h>
74#include <machine/watch.h>
75
76void trap(struct trapframe *tf);
77int trap_mmu_fault(struct proc *p, struct trapframe *tf);
78void syscall(struct proc *p, struct trapframe *tf, u_int sticks);
79
80const char *trap_msg[] = {
81	"reserved",
82	"power on reset",
83	"watchdog reset",
84	"externally initiated reset",
85	"software initiated reset",
86	"red state exception",
87	"instruction access exception",
88	"instruction access error",
89	"illegal instruction",
90	"privileged opcode",
91	"floating point disabled",
92	"floating point exception ieee 754",
93	"floating point exception other",
94	"tag overflow",
95	"division by zero",
96	"data access exception",
97	"data access error",
98	"memory address not aligned",
99	"lddf memory address not aligned",
100	"stdf memory address not aligned",
101	"privileged action",
102	"interrupt vector",
103	"physical address watchpoint",
104	"virtual address watchpoint",
105	"corrected ecc error",
106	"fast instruction access mmu miss",
107	"fast data access mmu miss",
108	"fast data access protection",
109	"clock",
110	"bad spill",
111	"bad fill",
112	"breakpoint",
113	"syscall",
114};
115
116void
117trap(struct trapframe *tf)
118{
119	u_int sticks;
120	struct proc *p;
121	int error;
122	int ucode;
123	int type;
124	int sig;
125	int mask;
126
127	KASSERT(PCPU_GET(curproc) != NULL, ("trap: curproc NULL"));
128	KASSERT(PCPU_GET(curpcb) != NULL, ("trap: curpcb NULL"));
129
130	p = PCPU_GET(curproc);
131	type = T_TYPE(tf->tf_type);
132	ucode = type;	/* XXX */
133
134	if ((type & T_KERNEL) == 0)
135		sticks = p->p_sticks;
136
137	switch (type) {
138	case T_FP_DISABLED:
139		if (fp_enable_proc(p))
140			goto user;
141		else {
142			sig = SIGFPE;
143			goto trapsig;
144		}
145		break;
146	case T_IMMU_MISS:
147	case T_DMMU_MISS:
148	case T_DMMU_PROT:
149		mtx_lock(&Giant);
150		error = trap_mmu_fault(p, tf);
151		mtx_unlock(&Giant);
152		if (error == 0)
153			goto user;
154		break;
155	case T_INTR:
156		intr_dispatch(T_LEVEL(tf->tf_type), tf);
157		goto user;
158	case T_SYSCALL:
159		/* syscall() calls userret(), so we need goto out; */
160		syscall(p, tf, sticks);
161		goto out;
162#ifdef DDB
163	case T_BREAKPOINT | T_KERNEL:
164		if (kdb_trap(tf) != 0)
165			goto out;
166		break;
167#endif
168	case T_WATCH_VIRT | T_KERNEL:
169		/*
170		 * At the moment, just print the information from the trap,
171		 * remove the watchpoint, use evil magic to execute the
172		 * instruction (we temporarily save the instruction at
173		 * %tnpc, write a trap instruction, resume, and reset the
174		 * watch point when the trap arrives).
175		 * To make sure that no interrupt gets in between and creates
176		 * a potentially large window where the watchpoint is inactive,
177		 * disable interrupts temporarily.
178		 * This is obviously fragile and evilish.
179		 */
180		printf("Virtual watchpoint triggered, tpc=0x%lx, tnpc=0x%lx\n",
181		    tf->tf_tpc, tf->tf_tnpc);
182		PCPU_SET(wp_pstate, (tf->tf_tstate & TSTATE_PSTATE_MASK) >>
183		    TSTATE_PSTATE_SHIFT);
184		tf->tf_tstate &= ~TSTATE_IE;
185		wrpr(pstate, rdpr(pstate), PSTATE_IE);
186		PCPU_SET(wp_insn, *((u_int *)tf->tf_tnpc));
187		*((u_int *)tf->tf_tnpc) = 0x91d03002;	/* ta %xcc, 2 */
188		flush(tf->tf_tnpc);
189		PCPU_SET(wp_va, watch_virt_get(&mask));
190		PCPU_SET(wp_mask, mask);
191		watch_virt_clear();
192		goto out;
193	case T_RESTOREWP | T_KERNEL:
194		/*
195		 * Undo the tweaks tone for T_WATCH, reset the watch point and
196		 * contunue execution.
197		 * Note that here, we run with interrupts enabled, so there
198		 * is a small chance that we will be interrupted before we
199		 * could reset the watch point.
200		 */
201		tf->tf_tstate = (tf->tf_tstate & ~TSTATE_PSTATE_MASK) |
202		    PCPU_GET(wp_pstate) << TSTATE_PSTATE_SHIFT;
203		watch_virt_set_mask(PCPU_GET(wp_va), PCPU_GET(wp_mask));
204		*(u_int *)tf->tf_tpc = PCPU_GET(wp_insn);
205		flush(tf->tf_tpc);
206		goto out;
207	case T_DMMU_MISS | T_KERNEL:
208	case T_DMMU_PROT | T_KERNEL:
209		mtx_lock(&Giant);
210		error = trap_mmu_fault(p, tf);
211		mtx_unlock(&Giant);
212		if (error == 0)
213			goto out;
214		break;
215	case T_INTR | T_KERNEL:
216		intr_dispatch(T_LEVEL(tf->tf_type), tf);
217		goto out;
218	default:
219		break;
220	}
221	panic("trap: %s", trap_msg[type & ~T_KERNEL]);
222
223trapsig:
224	mtx_lock(&Giant);
225	/* Translate fault for emulators. */
226	if (p->p_sysent->sv_transtrap != NULL)
227		sig = (p->p_sysent->sv_transtrap)(sig, type);
228
229	trapsignal(p, sig, ucode);
230	mtx_unlock(&Giant);
231user:
232	userret(p, tf, sticks);
233	if (mtx_owned(&Giant))
234		mtx_unlock(&Giant);
235out:
236	return;
237}
238
239int
240trap_mmu_fault(struct proc *p, struct trapframe *tf)
241{
242	struct mmuframe *mf;
243	struct vmspace *vm;
244	vm_offset_t va;
245	vm_prot_t type;
246	int rv;
247
248	KASSERT(p->p_vmspace != NULL, ("trap_dmmu_miss: vmspace NULL"));
249
250	type = 0;
251	rv = KERN_FAILURE;
252	mf = tf->tf_arg;
253	va = TLB_TAR_VA(mf->mf_tar);
254	switch (tf->tf_type) {
255	case T_DMMU_MISS | T_KERNEL:
256		/*
257		 * If the context is nucleus this is a soft fault on kernel
258		 * memory, just fault in the pages.
259		 */
260		if (TLB_TAR_CTX(mf->mf_tar) == TLB_CTX_KERNEL) {
261			rv = vm_fault(kernel_map, va, VM_PROT_READ,
262			    VM_FAULT_NORMAL);
263			break;
264		}
265
266		/*
267		 * Don't allow kernel mode faults on user memory unless
268		 * pcb_onfault is set.
269		 */
270		if (PCPU_GET(curpcb)->pcb_onfault == NULL)
271			break;
272		/* Fallthrough. */
273	case T_IMMU_MISS:
274	case T_DMMU_MISS:
275		/*
276		 * First try the tsb.  The primary tsb was already searched.
277		 */
278		vm = p->p_vmspace;
279		if (tsb_miss(&vm->vm_pmap, tf->tf_type, mf) == 0) {
280			rv = KERN_SUCCESS;
281			break;
282		}
283
284		/*
285		 * Not found, call the vm system.
286		 */
287
288		if (tf->tf_type == T_IMMU_MISS)
289			type = VM_PROT_EXECUTE | VM_PROT_READ;
290		else
291			type = VM_PROT_READ;
292
293		/*
294		 * Keep the process from being swapped out at this critical
295		 * time.
296		 */
297		PROC_LOCK(p);
298		++p->p_lock;
299		PROC_UNLOCK(p);
300
301		/*
302		 * Grow the stack if necessary.  vm_map_growstack only fails
303		 * if the va falls into a growable stack region and the stack
304		 * growth fails.  If it succeeds, or the va was not within a
305		 * growable stack region, fault in the user page.
306		 */
307		if (vm_map_growstack(p, va) != KERN_SUCCESS)
308			rv = KERN_FAILURE;
309		else
310			rv = vm_fault(&vm->vm_map, va, type, VM_FAULT_NORMAL);
311
312		/*
313		 * Now the process can be swapped again.
314		 */
315		PROC_LOCK(p);
316		--p->p_lock;
317		PROC_UNLOCK(p);
318		break;
319	case T_DMMU_PROT | T_KERNEL:
320		/*
321		 * Protection faults should not happen on kernel memory.
322		 */
323		if (TLB_TAR_CTX(mf->mf_tar) == TLB_CTX_KERNEL)
324			break;
325
326		/*
327		 * Don't allow kernel mode faults on user memory unless
328		 * pcb_onfault is set.
329		 */
330		if (PCPU_GET(curpcb)->pcb_onfault == NULL)
331			break;
332		/* Fallthrough. */
333	case T_DMMU_PROT:
334		/*
335		 * Only look in the tsb.  Write access to an unmapped page
336		 * causes a miss first, so the page must have already been
337		 * brought in by vm_fault, we just need to find the tte and
338		 * update the write bit.  XXX How do we tell them vm system
339		 * that we are now writing?
340		 */
341		vm = p->p_vmspace;
342		if (tsb_miss(&vm->vm_pmap, tf->tf_type, mf) == 0)
343			rv = KERN_SUCCESS;
344		break;
345	default:
346		break;
347	}
348	if (rv == KERN_SUCCESS)
349		return (0);
350	if (tf->tf_type & T_KERNEL) {
351		if (PCPU_GET(curpcb)->pcb_onfault != NULL &&
352		    TLB_TAR_CTX(mf->mf_tar) != TLB_CTX_KERNEL) {
353			tf->tf_tpc = (u_long)PCPU_GET(curpcb)->pcb_onfault;
354			tf->tf_tnpc = tf->tf_tpc + 4;
355			return (0);
356		}
357	}
358	return (rv == KERN_PROTECTION_FAILURE ? SIGBUS : SIGSEGV);
359}
360
361/* Maximum number of arguments that can be passed via the out registers. */
362#define	REG_MAXARGS	6
363
364/*
365 * Syscall handler. The arguments to the syscall are passed in the o registers
366 * by the caller, and are saved in the trap frame. The syscall number is passed
367 * in %g1 (and also saved in the trap frame).
368 */
369void
370syscall(struct proc *p, struct trapframe *tf, u_int sticks)
371{
372	struct sysent *callp;
373	u_long code;
374	u_long tpc;
375	int reg;
376	int regcnt;
377	int narg;
378	int error;
379	register_t args[8];
380	void *argp;
381
382	narg = 0;
383	error = 0;
384	reg = 0;
385	regcnt = REG_MAXARGS;
386	code = tf->tf_global[1];
387	atomic_add_int(&cnt.v_syscall, 1);
388	/*
389	 * For syscalls, we don't want to retry the faulting instruction
390	 * (usually), instead we need to advance one instruction.
391	 */
392	tpc = tf->tf_tpc;
393	tf->tf_tpc = tf->tf_tnpc;
394	tf->tf_tnpc += 4;
395
396	if (p->p_sysent->sv_prepsyscall) {
397		/*
398		 * The prep code is not MP aware.
399		 */
400#if 0
401		mtx_lock(&Giant);
402		(*p->p_sysent->sv_prepsyscall)(tf, args, &code, &params);
403		mtx_unlock(&Giant);
404#endif
405	} else 	if (code == SYS_syscall || code == SYS___syscall) {
406		code = tf->tf_out[reg++];
407		regcnt--;
408	}
409
410 	if (p->p_sysent->sv_mask)
411 		code &= p->p_sysent->sv_mask;
412
413 	if (code >= p->p_sysent->sv_size)
414 		callp = &p->p_sysent->sv_table[0];
415  	else
416 		callp = &p->p_sysent->sv_table[code];
417
418	narg = callp->sy_narg & SYF_ARGMASK;
419
420	if (narg <= regcnt)
421		argp = &tf->tf_out[reg];
422	else {
423		KASSERT(narg <= sizeof(args) / sizeof(args[0]),
424		    ("Too many syscall arguments!"));
425		argp = args;
426		bcopy(&tf->tf_out[reg], args, sizeof(args[0]) * regcnt);
427		error = copyin((void *)(tf->tf_out[6] + SPOFF +
428		    offsetof(struct frame, f_pad[6])),
429		    &args[reg + regcnt], (narg - regcnt) * sizeof(args[0]));
430		if (error != 0)
431			goto bad;
432	}
433
434	/*
435	 * Try to run the syscall without the MP lock if the syscall
436	 * is MP safe.
437	 */
438	if ((callp->sy_narg & SYF_MPSAFE) == 0)
439		mtx_lock(&Giant);
440
441#ifdef KTRACE
442	/*
443	 * We have to obtain the MP lock no matter what if
444	 * we are ktracing
445	 */
446	if (KTRPOINT(p, KTR_SYSCALL)) {
447		if (!mtx_owned(&Giant))
448			mtx_lock(&Giant);
449		ktrsyscall(p->p_tracep, code, narg, args);
450	}
451#endif
452	p->p_retval[0] = 0;
453	p->p_retval[1] = tf->tf_out[1];
454
455	STOPEVENT(p, S_SCE, narg);	/* MP aware */
456
457	error = (*callp->sy_call)(p, argp);
458
459	/*
460	 * MP SAFE (we may or may not have the MP lock at this point)
461	 */
462	switch (error) {
463	case 0:
464		tf->tf_out[0] = p->p_retval[0];
465		tf->tf_out[1] = p->p_retval[1];
466		tf->tf_tstate &= ~TSTATE_XCC_C;
467		break;
468
469	case ERESTART:
470		/*
471		 * Undo the tpc advancement we have done above, we want to
472		 * reexecute the system call.
473		 */
474		tf->tf_tpc = tpc;
475		tf->tf_tnpc -= 4;
476		break;
477
478	case EJUSTRETURN:
479		break;
480
481	default:
482bad:
483 		if (p->p_sysent->sv_errsize) {
484 			if (error >= p->p_sysent->sv_errsize)
485  				error = -1;	/* XXX */
486   			else
487  				error = p->p_sysent->sv_errtbl[error];
488		}
489		tf->tf_out[0] = error;
490		tf->tf_tstate |= TSTATE_XCC_C;
491		break;
492	}
493
494	/*
495	 * Handle reschedule and other end-of-syscall issues
496	 */
497	userret(p, tf, sticks);
498
499#ifdef KTRACE
500	if (KTRPOINT(p, KTR_SYSRET)) {
501		if (!mtx_owned(&Giant))
502			mtx_lock(&Giant);
503		ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
504	}
505#endif
506
507	/*
508	 * Release Giant if we had to get it
509	 */
510	if (mtx_owned(&Giant))
511		mtx_unlock(&Giant);
512
513	/*
514	 * This works because errno is findable through the
515	 * register set.  If we ever support an emulation where this
516	 * is not the case, this code will need to be revisited.
517	 */
518	STOPEVENT(p, S_SCX, code);
519
520#ifdef WITNESS
521	if (witness_list(p)) {
522		panic("system call %s returning with mutex(s) held\n",
523		    syscallnames[code]);
524	}
525#endif
526	mtx_assert(&sched_lock, MA_NOTOWNED);
527	mtx_assert(&Giant, MA_NOTOWNED);
528
529}
530