trap.c revision 103494
1/*-
2 * Copyright (c) 2001, Jake Burkholder
3 * Copyright (C) 1994, David Greenman
4 * Copyright (c) 1990, 1993
5 *      The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the University of Utah, and William Jolitz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *      This product includes software developed by the University of
21 *      California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *      from: @(#)trap.c        7.4 (Berkeley) 5/13/91
39 * 	from: FreeBSD: src/sys/i386/i386/trap.c,v 1.197 2001/07/19
40 * $FreeBSD: head/sys/sparc64/sparc64/trap.c 103494 2002-09-17 16:21:48Z jake $
41 */
42
43#include "opt_ddb.h"
44#include "opt_ktr.h"
45#include "opt_ktrace.h"
46
47#include <sys/param.h>
48#include <sys/kernel.h>
49#include <sys/bus.h>
50#include <sys/interrupt.h>
51#include <sys/ktr.h>
52#include <sys/kse.h>
53#include <sys/lock.h>
54#include <sys/mutex.h>
55#include <sys/systm.h>
56#include <sys/pioctl.h>
57#include <sys/proc.h>
58#include <sys/smp.h>
59#include <sys/syscall.h>
60#include <sys/sysctl.h>
61#include <sys/sysent.h>
62#include <sys/user.h>
63#include <sys/vmmeter.h>
64#ifdef KTRACE
65#include <sys/uio.h>
66#include <sys/ktrace.h>
67#endif
68
69#include <vm/vm.h>
70#include <vm/pmap.h>
71#include <vm/vm_extern.h>
72#include <vm/vm_param.h>
73#include <vm/vm_kern.h>
74#include <vm/vm_map.h>
75#include <vm/vm_page.h>
76
77#include <machine/clock.h>
78#include <machine/frame.h>
79#include <machine/intr_machdep.h>
80#include <machine/pcb.h>
81#include <machine/smp.h>
82#include <machine/trap.h>
83#include <machine/tstate.h>
84#include <machine/tte.h>
85#include <machine/tlb.h>
86#include <machine/tsb.h>
87#include <machine/watch.h>
88
89void trap(struct trapframe *tf);
90void syscall(struct trapframe *tf);
91
92static int trap_pfault(struct thread *td, struct trapframe *tf);
93
94extern char copy_fault[];
95extern char copy_nofault_begin[];
96extern char copy_nofault_end[];
97
98extern char fs_fault[];
99extern char fs_nofault_begin[];
100extern char fs_nofault_end[];
101extern char fs_nofault_intr_begin[];
102extern char fs_nofault_intr_end[];
103
104extern char *syscallnames[];
105
106const char *trap_msg[] = {
107	"reserved",
108	"instruction access exception",
109	"instruction access error",
110	"instruction access protection",
111	"illtrap instruction",
112	"illegal instruction",
113	"privileged opcode",
114	"floating point disabled",
115	"floating point exception ieee 754",
116	"floating point exception other",
117	"tag overflow",
118	"division by zero",
119	"data access exception",
120	"data access error",
121	"data access protection",
122	"memory address not aligned",
123	"privileged action",
124	"async data error",
125	"trap instruction 16",
126	"trap instruction 17",
127	"trap instruction 18",
128	"trap instruction 19",
129	"trap instruction 20",
130	"trap instruction 21",
131	"trap instruction 22",
132	"trap instruction 23",
133	"trap instruction 24",
134	"trap instruction 25",
135	"trap instruction 26",
136	"trap instruction 27",
137	"trap instruction 28",
138	"trap instruction 29",
139	"trap instruction 30",
140	"trap instruction 31",
141	"interrupt",
142	"physical address watchpoint",
143	"virtual address watchpoint",
144	"corrected ecc error",
145	"fast instruction access mmu miss",
146	"fast data access mmu miss",
147	"spill",
148	"fill",
149	"fill",
150	"breakpoint",
151	"clean window",
152	"range check",
153	"fix alignment",
154	"integer overflow",
155	"syscall",
156	"restore physical watchpoint",
157	"restore virtual watchpoint",
158	"kernel stack fault",
159};
160
161int debugger_on_signal = 0;
162SYSCTL_INT(_debug, OID_AUTO, debugger_on_signal, CTLFLAG_RW,
163    &debugger_on_signal, 0, "");
164
165void
166trap(struct trapframe *tf)
167{
168	struct thread *td;
169	struct proc *p;
170	u_int sticks;
171	int error;
172	int ucode;
173#ifdef DDB
174	int mask;
175#endif
176	int type;
177	int sig;
178
179	KASSERT(PCPU_GET(curthread) != NULL, ("trap: curthread NULL"));
180	KASSERT(PCPU_GET(curthread)->td_kse != NULL, ("trap: curkse NULL"));
181	KASSERT(PCPU_GET(curthread)->td_proc != NULL, ("trap: curproc NULL"));
182
183	atomic_add_int(&cnt.v_trap, 1);
184
185	td = PCPU_GET(curthread);
186	p = td->td_proc;
187
188	error = 0;
189	type = tf->tf_type;
190	ucode = type;	/* XXX */
191
192	CTR4(KTR_TRAP, "trap: %s type=%s (%s) pil=%#lx",
193	    p->p_comm, trap_msg[type & ~T_KERNEL],
194	    ((type & T_KERNEL) ? "kernel" : "user"),
195	    rdpr(pil));
196
197	if ((type & T_KERNEL) == 0) {
198		sticks = td->td_kse->ke_sticks;
199		td->td_frame = tf;
200		if (td->td_ucred != p->p_ucred)
201			cred_update_thread(td);
202		if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
203			PROC_LOCK(p);
204			mtx_lock_spin(&sched_lock);
205			thread_exit();
206			/* NOTREACHED */
207		}
208 	} else {
209 		sticks = 0;
210if ((type & ~T_KERNEL) != T_BREAKPOINT)
211		KASSERT(cold || td->td_ucred != NULL,
212		    ("kernel trap doesn't have ucred"));
213	}
214
215	switch (type) {
216
217	/*
218	 * User Mode Traps
219	 */
220	case T_MEM_ADDRESS_NOT_ALIGNED:
221		sig = SIGILL;
222		goto trapsig;
223#if 0
224	case T_ALIGN_LDDF:
225	case T_ALIGN_STDF:
226		sig = SIGBUS;
227		goto trapsig;
228#endif
229	case T_BREAKPOINT:
230		sig = SIGTRAP;
231		goto trapsig;
232	case T_DIVISION_BY_ZERO:
233		sig = SIGFPE;
234		goto trapsig;
235	case T_FP_DISABLED:
236	case T_FP_EXCEPTION_IEEE_754:
237	case T_FP_EXCEPTION_OTHER:
238		sig = SIGFPE;
239		goto trapsig;
240	case T_DATA_ERROR:
241	case T_DATA_EXCEPTION:
242	case T_INSTRUCTION_ERROR:
243	case T_INSTRUCTION_EXCEPTION:
244		sig = SIGILL;	/* XXX */
245		goto trapsig;
246	case T_DATA_MISS:
247	case T_DATA_PROTECTION:
248	case T_INSTRUCTION_MISS:
249		error = trap_pfault(td, tf);
250		if (error == 0)
251			goto user;
252		sig = error;
253		goto trapsig;
254	case T_FILL:
255		if (rwindow_load(td, tf, 2)) {
256			PROC_LOCK(p);
257			sigexit(td, SIGILL);
258			/* Not reached. */
259		}
260		goto user;
261	case T_FILL_RET:
262		if (rwindow_load(td, tf, 1)) {
263			PROC_LOCK(p);
264			sigexit(td, SIGILL);
265			/* Not reached. */
266		}
267		goto user;
268	case T_ILLEGAL_INSTRUCTION:
269		sig = SIGILL;
270		goto trapsig;
271	case T_PRIVILEGED_ACTION:
272	case T_PRIVILEGED_OPCODE:
273		sig = SIGBUS;
274		goto trapsig;
275	case T_TRAP_INSTRUCTION_16:
276	case T_TRAP_INSTRUCTION_17:
277	case T_TRAP_INSTRUCTION_18:
278	case T_TRAP_INSTRUCTION_19:
279	case T_TRAP_INSTRUCTION_20:
280	case T_TRAP_INSTRUCTION_21:
281	case T_TRAP_INSTRUCTION_22:
282	case T_TRAP_INSTRUCTION_23:
283	case T_TRAP_INSTRUCTION_24:
284	case T_TRAP_INSTRUCTION_25:
285	case T_TRAP_INSTRUCTION_26:
286	case T_TRAP_INSTRUCTION_27:
287	case T_TRAP_INSTRUCTION_28:
288	case T_TRAP_INSTRUCTION_29:
289	case T_TRAP_INSTRUCTION_30:
290	case T_TRAP_INSTRUCTION_31:
291		sig = SIGILL;
292		goto trapsig;
293	case T_SPILL:
294		if (rwindow_save(td)) {
295			PROC_LOCK(p);
296			sigexit(td, SIGILL);
297			/* Not reached. */
298		}
299		goto user;
300	case T_TAG_OFERFLOW:
301		sig = SIGEMT;
302		goto trapsig;
303
304	/*
305	 * Kernel Mode Traps
306	 */
307#ifdef DDB
308	case T_BREAKPOINT | T_KERNEL:
309	case T_KSTACK_FAULT | T_KERNEL:
310		if (kdb_trap(tf) != 0)
311			goto out;
312		break;
313#endif
314	case T_DATA_EXCEPTION | T_KERNEL:
315	case T_MEM_ADDRESS_NOT_ALIGNED | T_KERNEL:
316		if ((tf->tf_sfsr & MMU_SFSR_FV) == 0 ||
317		    MMU_SFSR_GET_ASI(tf->tf_sfsr) != ASI_AIUP)
318			break;
319		if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
320		    tf->tf_tpc <= (u_long)copy_nofault_end) {
321			tf->tf_tpc = (u_long)copy_fault;
322			tf->tf_tnpc = tf->tf_tpc + 4;
323			goto out;
324		}
325		if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
326		    tf->tf_tpc <= (u_long)fs_nofault_end) {
327			tf->tf_tpc = (u_long)fs_fault;
328			tf->tf_tnpc = tf->tf_tpc + 4;
329			goto out;
330		}
331		break;
332	case T_DATA_MISS | T_KERNEL:
333	case T_DATA_PROTECTION | T_KERNEL:
334	case T_INSTRUCTION_MISS | T_KERNEL:
335		error = trap_pfault(td, tf);
336		if (error == 0)
337			goto out;
338		break;
339#ifdef DDB
340	case T_PA_WATCHPOINT | T_KERNEL:
341		TR3("trap: watch phys pa=%#lx tpc=%#lx, tnpc=%#lx",
342		    watch_phys_get(&mask), tf->tf_tpc, tf->tf_tnpc);
343		PCPU_SET(wp_pstate, (tf->tf_tstate & TSTATE_PSTATE_MASK) >>
344		    TSTATE_PSTATE_SHIFT);
345		tf->tf_tstate &= ~TSTATE_IE;
346		intr_disable();
347		PCPU_SET(wp_insn, *((u_int *)tf->tf_tnpc));
348		*((u_int *)tf->tf_tnpc) = 0x91d03002;	/* ta %xcc, 2 */
349		flush(tf->tf_tnpc);
350		PCPU_SET(wp_va, watch_phys_get(&mask));
351		PCPU_SET(wp_mask, mask);
352		watch_phys_clear();
353		goto out;
354	case T_VA_WATCHPOINT | T_KERNEL:
355		/*
356		 * At the moment, just print the information from the trap,
357		 * remove the watchpoint, use evil magic to execute the
358		 * instruction (we temporarily save the instruction at
359		 * %tnpc, write a trap instruction, resume, and reset the
360		 * watch point when the trap arrives).
361		 * To make sure that no interrupt gets in between and creates
362		 * a potentially large window where the watchpoint is inactive,
363		 * disable interrupts temporarily.
364		 * This is obviously fragile and evilish.
365		 */
366		TR3("trap: watch virt pa=%#lx tpc=%#lx, tnpc=%#lx",
367		    watch_virt_get(&mask), tf->tf_tpc, tf->tf_tnpc);
368		PCPU_SET(wp_pstate, (tf->tf_tstate & TSTATE_PSTATE_MASK) >>
369		    TSTATE_PSTATE_SHIFT);
370		tf->tf_tstate &= ~TSTATE_IE;
371		/*
372		 * This has no matching intr_restore; the PSTATE_IE state of the
373		 * trapping code will be restored when the watch point is
374		 * restored.
375		 */
376		intr_disable();
377		PCPU_SET(wp_insn, *((u_int *)tf->tf_tnpc));
378		*((u_int *)tf->tf_tnpc) = 0x91d03003;	/* ta %xcc, 3 */
379		flush(tf->tf_tnpc);
380		PCPU_SET(wp_va, watch_virt_get(&mask));
381		PCPU_SET(wp_mask, mask);
382		watch_virt_clear();
383		goto out;
384	case T_RSTRWP_PHYS | T_KERNEL:
385		tf->tf_tstate = (tf->tf_tstate & ~TSTATE_PSTATE_MASK) |
386		    PCPU_GET(wp_pstate) << TSTATE_PSTATE_SHIFT;
387		watch_phys_set_mask(PCPU_GET(wp_va), PCPU_GET(wp_mask));
388		*(u_int *)tf->tf_tpc = PCPU_GET(wp_insn);
389		flush(tf->tf_tpc);
390		goto out;
391	case T_RSTRWP_VIRT | T_KERNEL:
392		/*
393		 * Undo the tweaks tone for T_WATCH, reset the watch point and
394		 * contunue execution.
395		 * Note that here, we run with interrupts enabled, so there
396		 * is a small chance that we will be interrupted before we
397		 * could reset the watch point.
398		 */
399		tf->tf_tstate = (tf->tf_tstate & ~TSTATE_PSTATE_MASK) |
400		    PCPU_GET(wp_pstate) << TSTATE_PSTATE_SHIFT;
401		watch_virt_set_mask(PCPU_GET(wp_va), PCPU_GET(wp_mask));
402		*(u_int *)tf->tf_tpc = PCPU_GET(wp_insn);
403		flush(tf->tf_tpc);
404		goto out;
405#endif
406	default:
407		break;
408	}
409	panic("trap: %s", trap_msg[type & ~T_KERNEL]);
410
411trapsig:
412	/* Translate fault for emulators. */
413	if (p->p_sysent->sv_transtrap != NULL)
414		sig = (p->p_sysent->sv_transtrap)(sig, type);
415	if (debugger_on_signal && (sig == 4 || sig == 10 || sig == 11))
416		Debugger("trapsig");
417	trapsignal(p, sig, ucode);
418user:
419	userret(td, tf, sticks);
420	mtx_assert(&Giant, MA_NOTOWNED);
421#ifdef DIAGNOSTIC
422	cred_free_thread(td);
423#endif
424out:
425	CTR1(KTR_TRAP, "trap: td=%p return", td);
426	return;
427}
428
429static int
430trap_pfault(struct thread *td, struct trapframe *tf)
431{
432	struct vmspace *vm;
433	struct pcb *pcb;
434	struct proc *p;
435	vm_offset_t va;
436	vm_prot_t prot;
437	u_long ctx;
438	int flags;
439	int type;
440	int rv;
441
442	p = td->td_proc;
443	KASSERT(td->td_pcb != NULL, ("trap_pfault: pcb NULL"));
444	KASSERT(p->p_vmspace != NULL, ("trap_pfault: vmspace NULL"));
445
446	rv = KERN_SUCCESS;
447	ctx = TLB_TAR_CTX(tf->tf_tar);
448	pcb = td->td_pcb;
449	type = tf->tf_type & ~T_KERNEL;
450	va = TLB_TAR_VA(tf->tf_tar);
451
452	CTR4(KTR_TRAP, "trap_pfault: td=%p pm_ctx=%#lx va=%#lx ctx=%#lx",
453	    td, p->p_vmspace->vm_pmap.pm_context[PCPU_GET(cpuid)], va, ctx);
454
455	if (type == T_DATA_PROTECTION) {
456		prot = VM_PROT_WRITE;
457		flags = VM_FAULT_DIRTY;
458	} else {
459		if (type == T_DATA_MISS)
460			prot = VM_PROT_READ;
461		else
462			prot = VM_PROT_READ | VM_PROT_EXECUTE;
463		flags = VM_FAULT_NORMAL;
464	}
465
466	if (ctx != TLB_CTX_KERNEL) {
467		if ((tf->tf_tstate & TSTATE_PRIV) != 0 &&
468		    (tf->tf_tpc >= (u_long)fs_nofault_intr_begin &&
469		     tf->tf_tpc <= (u_long)fs_nofault_intr_end)) {
470			tf->tf_tpc = (u_long)fs_fault;
471			tf->tf_tnpc = tf->tf_tpc + 4;
472			return (0);
473		}
474
475		/*
476		 * This is a fault on non-kernel virtual memory.
477		 */
478		vm = p->p_vmspace;
479
480		/*
481		 * Keep swapout from messing with us during this
482		 * critical time.
483		 */
484		PROC_LOCK(p);
485		++p->p_lock;
486		PROC_UNLOCK(p);
487
488		/* Fault in the user page. */
489		rv = vm_fault(&vm->vm_map, va, prot, flags);
490
491		/*
492		 * Now the process can be swapped again.
493		 */
494		PROC_LOCK(p);
495		--p->p_lock;
496		PROC_UNLOCK(p);
497	} else {
498		/*
499		 * This is a fault on kernel virtual memory.  Attempts to
500		 * access kernel memory from user mode cause privileged
501		 * action traps, not page fault.
502		 */
503		KASSERT(tf->tf_tstate & TSTATE_PRIV,
504		    ("trap_pfault: fault on nucleus context from user mode"));
505
506		/*
507		 * Don't have to worry about process locking or stacks in the
508		 * kernel.
509		 */
510		rv = vm_fault(kernel_map, va, prot, VM_FAULT_NORMAL);
511	}
512
513	CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d",
514	    td, va, rv);
515	if (rv == KERN_SUCCESS)
516		return (0);
517	if (ctx != TLB_CTX_KERNEL && (tf->tf_tstate & TSTATE_PRIV) != 0) {
518		if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
519		    tf->tf_tpc <= (u_long)fs_nofault_end) {
520			tf->tf_tpc = (u_long)fs_fault;
521			tf->tf_tnpc = tf->tf_tpc + 4;
522			return (0);
523		}
524		if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
525		    tf->tf_tpc <= (u_long)copy_nofault_end) {
526			tf->tf_tpc = (u_long)copy_fault;
527			tf->tf_tnpc = tf->tf_tpc + 4;
528			return (0);
529		}
530	}
531	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
532}
533
534/* Maximum number of arguments that can be passed via the out registers. */
535#define	REG_MAXARGS	6
536
537/*
538 * Syscall handler. The arguments to the syscall are passed in the o registers
539 * by the caller, and are saved in the trap frame. The syscall number is passed
540 * in %g1 (and also saved in the trap frame).
541 */
542void
543syscall(struct trapframe *tf)
544{
545	struct sysent *callp;
546	struct thread *td;
547	register_t args[8];
548	register_t *argp;
549	struct proc *p;
550	u_int sticks;
551	u_long code;
552	u_long tpc;
553	int reg;
554	int regcnt;
555	int narg;
556	int error;
557
558	KASSERT(PCPU_GET(curthread) != NULL, ("trap: curthread NULL"));
559	KASSERT(PCPU_GET(curthread)->td_kse != NULL, ("trap: curkse NULL"));
560	KASSERT(PCPU_GET(curthread)->td_proc != NULL, ("trap: curproc NULL"));
561
562	atomic_add_int(&cnt.v_syscall, 1);
563
564	td = PCPU_GET(curthread);
565	p = td->td_proc;
566
567	narg = 0;
568	error = 0;
569	reg = 0;
570	regcnt = REG_MAXARGS;
571
572	sticks = td->td_kse->ke_sticks;
573	td->td_frame = tf;
574	if (td->td_ucred != p->p_ucred)
575		cred_update_thread(td);
576	if (p->p_flag & P_KSES) {
577		/*
578		 * If we are doing a syscall in a KSE environment,
579		 * note where our mailbox is. There is always the
580		 * possibility that we could do this lazily (in sleep()),
581		 * but for now do it every time.
582		 */
583		td->td_mailbox = (void *)fuword((caddr_t)td->td_kse->ke_mailbox
584		    + offsetof(struct kse_mailbox, km_curthread));
585		if ((td->td_mailbox == NULL) ||
586		    (td->td_mailbox == (void *)-1)) {
587			td->td_mailbox = NULL;  /* single thread it.. */
588			td->td_flags &= ~TDF_UNBOUND;
589		} else {
590			td->td_flags |= TDF_UNBOUND;
591		}
592	}
593	code = tf->tf_global[1];
594
595	/*
596	 * For syscalls, we don't want to retry the faulting instruction
597	 * (usually), instead we need to advance one instruction.
598	 */
599	tpc = tf->tf_tpc;
600	TF_DONE(tf);
601
602	if (p->p_sysent->sv_prepsyscall) {
603		/*
604		 * The prep code is MP aware.
605		 */
606#if 0
607		(*p->p_sysent->sv_prepsyscall)(tf, args, &code, &params);
608#endif
609	} else 	if (code == SYS_syscall || code == SYS___syscall) {
610		code = tf->tf_out[reg++];
611		regcnt--;
612	}
613
614 	if (p->p_sysent->sv_mask)
615 		code &= p->p_sysent->sv_mask;
616
617 	if (code >= p->p_sysent->sv_size)
618 		callp = &p->p_sysent->sv_table[0];
619  	else
620 		callp = &p->p_sysent->sv_table[code];
621
622	narg = callp->sy_narg & SYF_ARGMASK;
623
624	if (narg <= regcnt) {
625		argp = &tf->tf_out[reg];
626		error = 0;
627	} else {
628		KASSERT(narg <= sizeof(args) / sizeof(args[0]),
629		    ("Too many syscall arguments!"));
630		argp = args;
631		bcopy(&tf->tf_out[reg], args, sizeof(args[0]) * regcnt);
632		error = copyin((void *)(tf->tf_out[6] + SPOFF +
633		    offsetof(struct frame, fr_pad[6])),
634		    &args[regcnt], (narg - regcnt) * sizeof(args[0]));
635	}
636
637	CTR5(KTR_SYSC, "syscall: td=%p %s(%#lx, %#lx, %#lx)", td,
638	    syscallnames[code], argp[0], argp[1], argp[2]);
639
640	/*
641	 * Try to run the syscall without the MP lock if the syscall
642	 * is MP safe.
643	 */
644	if ((callp->sy_narg & SYF_MPSAFE) == 0)
645		mtx_lock(&Giant);
646
647#ifdef KTRACE
648	if (KTRPOINT(td, KTR_SYSCALL))
649		ktrsyscall(code, narg, argp);
650#endif
651	if (error == 0) {
652		td->td_retval[0] = 0;
653		td->td_retval[1] = 0;
654
655		STOPEVENT(p, S_SCE, narg);	/* MP aware */
656
657		error = (*callp->sy_call)(td, argp);
658
659		CTR5(KTR_SYSC, "syscall: p=%p error=%d %s return %#lx %#lx ", p,
660		    error, syscallnames[code], td->td_retval[0],
661		    td->td_retval[1]);
662	}
663
664	/*
665	 * MP SAFE (we may or may not have the MP lock at this point)
666	 */
667	switch (error) {
668	case 0:
669		tf->tf_out[0] = td->td_retval[0];
670		tf->tf_out[1] = td->td_retval[1];
671		tf->tf_tstate &= ~TSTATE_XCC_C;
672		break;
673
674	case ERESTART:
675		/*
676		 * Undo the tpc advancement we have done above, we want to
677		 * reexecute the system call.
678		 */
679		tf->tf_tpc = tpc;
680		tf->tf_tnpc -= 4;
681		break;
682
683	case EJUSTRETURN:
684		break;
685
686	default:
687 		if (p->p_sysent->sv_errsize) {
688 			if (error >= p->p_sysent->sv_errsize)
689  				error = -1;	/* XXX */
690   			else
691  				error = p->p_sysent->sv_errtbl[error];
692		}
693		tf->tf_out[0] = error;
694		tf->tf_tstate |= TSTATE_XCC_C;
695		break;
696	}
697
698	/*
699	 * Release Giant if we had to get it.  Don't use mtx_owned(),
700	 * we want to catch broken syscalls.
701	 */
702	if ((callp->sy_narg & SYF_MPSAFE) == 0)
703		mtx_unlock(&Giant);
704
705	/*
706	 * Handle reschedule and other end-of-syscall issues
707	 */
708	userret(td, tf, sticks);
709
710#ifdef KTRACE
711	if (KTRPOINT(td, KTR_SYSRET))
712		ktrsysret(code, error, td->td_retval[0]);
713#endif
714	/*
715	 * This works because errno is findable through the
716	 * register set.  If we ever support an emulation where this
717	 * is not the case, this code will need to be revisited.
718	 */
719	STOPEVENT(p, S_SCX, code);
720
721#ifdef DIAGNOSTIC
722	cred_free_thread(td);
723#endif
724#ifdef WITNESS
725	if (witness_list(td)) {
726		panic("system call %s returning with mutex(s) held\n",
727		    syscallnames[code]);
728	}
729#endif
730	mtx_assert(&sched_lock, MA_NOTOWNED);
731	mtx_assert(&Giant, MA_NOTOWNED);
732}
733