trap.c revision 206086
1/*-
2 * Copyright (c) 2001, Jake Burkholder
3 * Copyright (C) 1994, David Greenman
4 * Copyright (c) 1990, 1993
5 *      The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the University of Utah, and William Jolitz.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *      This product includes software developed by the University of
21 *      California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	from: @(#)trap.c        7.4 (Berkeley) 5/13/91
39 *	from: FreeBSD: src/sys/i386/i386/trap.c,v 1.197 2001/07/19
40 */
41
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: head/sys/sparc64/sparc64/trap.c 206086 2010-04-02 10:36:40Z marius $");
44
45#include "opt_ddb.h"
46#include "opt_ktr.h"
47#include "opt_ktrace.h"
48
49#include <sys/param.h>
50#include <sys/kdb.h>
51#include <sys/kernel.h>
52#include <sys/bus.h>
53#include <sys/interrupt.h>
54#include <sys/ktr.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#include <sys/systm.h>
58#include <sys/pcpu.h>
59#include <sys/pioctl.h>
60#include <sys/ptrace.h>
61#include <sys/proc.h>
62#include <sys/smp.h>
63#include <sys/signalvar.h>
64#include <sys/syscall.h>
65#include <sys/sysctl.h>
66#include <sys/sysent.h>
67#include <sys/vmmeter.h>
68#ifdef KTRACE
69#include <sys/uio.h>
70#include <sys/ktrace.h>
71#endif
72#include <security/audit/audit.h>
73
74#include <dev/ofw/openfirm.h>
75
76#include <vm/vm.h>
77#include <vm/pmap.h>
78#include <vm/vm_extern.h>
79#include <vm/vm_param.h>
80#include <vm/vm_kern.h>
81#include <vm/vm_map.h>
82#include <vm/vm_page.h>
83
84#include <machine/cpu.h>
85#include <machine/frame.h>
86#include <machine/intr_machdep.h>
87#include <machine/ofw_machdep.h>
88#include <machine/pcb.h>
89#include <machine/smp.h>
90#include <machine/trap.h>
91#include <machine/tstate.h>
92#include <machine/tte.h>
93#include <machine/tlb.h>
94#include <machine/tsb.h>
95#include <machine/watch.h>
96
97struct syscall_args {
98	u_long code;
99	struct sysent *callp;
100	register_t args[8];
101	register_t *argp;
102	int narg;
103};
104
105void trap(struct trapframe *tf);
106void syscall(struct trapframe *tf);
107
108static int fetch_syscall_args(struct thread *td, struct syscall_args *sa);
109static int trap_cecc(void);
110static int trap_pfault(struct thread *td, struct trapframe *tf);
111
112extern char copy_fault[];
113extern char copy_nofault_begin[];
114extern char copy_nofault_end[];
115
116extern char fs_fault[];
117extern char fs_nofault_begin[];
118extern char fs_nofault_end[];
119extern char fs_nofault_intr_begin[];
120extern char fs_nofault_intr_end[];
121
122extern char fas_fault[];
123extern char fas_nofault_begin[];
124extern char fas_nofault_end[];
125
126extern char *syscallnames[];
127
128const char *const trap_msg[] = {
129	"reserved",
130	"instruction access exception",
131	"instruction access error",
132	"instruction access protection",
133	"illtrap instruction",
134	"illegal instruction",
135	"privileged opcode",
136	"floating point disabled",
137	"floating point exception ieee 754",
138	"floating point exception other",
139	"tag overflow",
140	"division by zero",
141	"data access exception",
142	"data access error",
143	"data access protection",
144	"memory address not aligned",
145	"privileged action",
146	"async data error",
147	"trap instruction 16",
148	"trap instruction 17",
149	"trap instruction 18",
150	"trap instruction 19",
151	"trap instruction 20",
152	"trap instruction 21",
153	"trap instruction 22",
154	"trap instruction 23",
155	"trap instruction 24",
156	"trap instruction 25",
157	"trap instruction 26",
158	"trap instruction 27",
159	"trap instruction 28",
160	"trap instruction 29",
161	"trap instruction 30",
162	"trap instruction 31",
163	"fast instruction access mmu miss",
164	"fast data access mmu miss",
165	"interrupt",
166	"physical address watchpoint",
167	"virtual address watchpoint",
168	"corrected ecc error",
169	"spill",
170	"fill",
171	"fill",
172	"breakpoint",
173	"clean window",
174	"range check",
175	"fix alignment",
176	"integer overflow",
177	"syscall",
178	"restore physical watchpoint",
179	"restore virtual watchpoint",
180	"kernel stack fault",
181};
182
183static const int trap_sig[] = {
184	SIGILL,			/* reserved */
185	SIGILL,			/* instruction access exception */
186	SIGILL,			/* instruction access error */
187	SIGILL,			/* instruction access protection */
188	SIGILL,			/* illtrap instruction */
189	SIGILL,			/* illegal instruction */
190	SIGBUS,			/* privileged opcode */
191	SIGFPE,			/* floating point disabled */
192	SIGFPE,			/* floating point exception ieee 754 */
193	SIGFPE,			/* floating point exception other */
194	SIGEMT,			/* tag overflow */
195	SIGFPE,			/* division by zero */
196	SIGILL,			/* data access exception */
197	SIGILL,			/* data access error */
198	SIGBUS,			/* data access protection */
199	SIGBUS,			/* memory address not aligned */
200	SIGBUS,			/* privileged action */
201	SIGBUS,			/* async data error */
202	SIGILL,			/* trap instruction 16 */
203	SIGILL,			/* trap instruction 17 */
204	SIGILL,			/* trap instruction 18 */
205	SIGILL,			/* trap instruction 19 */
206	SIGILL,			/* trap instruction 20 */
207	SIGILL,			/* trap instruction 21 */
208	SIGILL,			/* trap instruction 22 */
209	SIGILL,			/* trap instruction 23 */
210	SIGILL,			/* trap instruction 24 */
211	SIGILL,			/* trap instruction 25 */
212	SIGILL,			/* trap instruction 26 */
213	SIGILL,			/* trap instruction 27 */
214	SIGILL,			/* trap instruction 28 */
215	SIGILL,			/* trap instruction 29 */
216	SIGILL,			/* trap instruction 30 */
217	SIGILL,			/* trap instruction 31 */
218	SIGSEGV,		/* fast instruction access mmu miss */
219	SIGSEGV,		/* fast data access mmu miss */
220	-1,			/* interrupt */
221	-1,			/* physical address watchpoint */
222	-1,			/* virtual address watchpoint */
223	-1,			/* corrected ecc error */
224	SIGILL,			/* spill */
225	SIGILL,			/* fill */
226	SIGILL,			/* fill */
227	SIGTRAP,		/* breakpoint */
228	SIGILL,			/* clean window */
229	SIGILL,			/* range check */
230	SIGILL,			/* fix alignment */
231	SIGILL,			/* integer overflow */
232	SIGSYS,			/* syscall */
233	-1,			/* restore physical watchpoint */
234	-1,			/* restore virtual watchpoint */
235	-1,			/* kernel stack fault */
236};
237
238CTASSERT(sizeof(struct trapframe) == 256);
239
240int debugger_on_signal = 0;
241SYSCTL_INT(_debug, OID_AUTO, debugger_on_signal, CTLFLAG_RW,
242    &debugger_on_signal, 0, "");
243
244u_int corrected_ecc = 0;
245SYSCTL_UINT(_machdep, OID_AUTO, corrected_ecc, CTLFLAG_RD, &corrected_ecc, 0,
246    "corrected ECC errors");
247
248/*
249 * SUNW,set-trap-table allows to take over %tba from the PROM, which
250 * will turn off interrupts and handle outstanding ones while doing so,
251 * in a safe way.
252 */
253void
254sun4u_set_traptable(void *tba_addr)
255{
256	static struct {
257		cell_t name;
258		cell_t nargs;
259		cell_t nreturns;
260		cell_t tba_addr;
261	} args = {
262		(cell_t)"SUNW,set-trap-table",
263		2,
264	};
265
266	args.tba_addr = (cell_t)tba_addr;
267	ofw_entry(&args);
268}
269
270void
271trap(struct trapframe *tf)
272{
273	struct thread *td;
274	struct proc *p;
275	int error;
276	int sig;
277	register_t addr;
278	ksiginfo_t ksi;
279
280	td = curthread;
281
282	CTR4(KTR_TRAP, "trap: %p type=%s (%s) pil=%#lx", td,
283	    trap_msg[tf->tf_type & ~T_KERNEL],
284	    (TRAPF_USERMODE(tf) ? "user" : "kernel"), rdpr(pil));
285
286	PCPU_INC(cnt.v_trap);
287
288	if ((tf->tf_tstate & TSTATE_PRIV) == 0) {
289		KASSERT(td != NULL, ("trap: curthread NULL"));
290		KASSERT(td->td_proc != NULL, ("trap: curproc NULL"));
291
292		p = td->td_proc;
293		td->td_pticks = 0;
294		td->td_frame = tf;
295		addr = tf->tf_tpc;
296		if (td->td_ucred != p->p_ucred)
297			cred_update_thread(td);
298
299		switch (tf->tf_type) {
300		case T_DATA_MISS:
301		case T_DATA_PROTECTION:
302			addr = tf->tf_sfar;
303			/* FALLTHROUGH */
304		case T_INSTRUCTION_MISS:
305			sig = trap_pfault(td, tf);
306			break;
307		case T_FILL:
308			sig = rwindow_load(td, tf, 2);
309			break;
310		case T_FILL_RET:
311			sig = rwindow_load(td, tf, 1);
312			break;
313		case T_SPILL:
314			sig = rwindow_save(td);
315			break;
316		case T_CORRECTED_ECC_ERROR:
317			sig = trap_cecc();
318			break;
319		default:
320			if (tf->tf_type < 0 || tf->tf_type >= T_MAX)
321				panic("trap: bad trap type %#lx (user)",
322				    tf->tf_type);
323			else if (trap_sig[tf->tf_type] == -1)
324				panic("trap: %s (user)",
325				    trap_msg[tf->tf_type]);
326			sig = trap_sig[tf->tf_type];
327			break;
328		}
329
330		if (sig != 0) {
331			/* Translate fault for emulators. */
332			if (p->p_sysent->sv_transtrap != NULL) {
333				sig = p->p_sysent->sv_transtrap(sig,
334				    tf->tf_type);
335			}
336			if (debugger_on_signal &&
337			    (sig == 4 || sig == 10 || sig == 11))
338				kdb_enter(KDB_WHY_TRAPSIG, "trapsig");
339			ksiginfo_init_trap(&ksi);
340			ksi.ksi_signo = sig;
341			ksi.ksi_code = (int)tf->tf_type; /* XXX not POSIX */
342			ksi.ksi_addr = (void *)addr;
343			ksi.ksi_trapno = (int)tf->tf_type;
344			trapsignal(td, &ksi);
345		}
346
347		userret(td, tf);
348		mtx_assert(&Giant, MA_NOTOWNED);
349	} else {
350		KASSERT((tf->tf_type & T_KERNEL) != 0,
351		    ("trap: kernel trap isn't"));
352
353		if (kdb_active) {
354			kdb_reenter();
355			return;
356		}
357
358		switch (tf->tf_type & ~T_KERNEL) {
359		case T_BREAKPOINT:
360		case T_KSTACK_FAULT:
361			error = (kdb_trap(tf->tf_type, 0, tf) == 0);
362			TF_DONE(tf);
363			break;
364#ifdef notyet
365		case T_PA_WATCHPOINT:
366		case T_VA_WATCHPOINT:
367			error = db_watch_trap(tf);
368			break;
369#endif
370		case T_DATA_MISS:
371		case T_DATA_PROTECTION:
372		case T_INSTRUCTION_MISS:
373			error = trap_pfault(td, tf);
374			break;
375		case T_DATA_EXCEPTION:
376		case T_MEM_ADDRESS_NOT_ALIGNED:
377			if ((tf->tf_sfsr & MMU_SFSR_FV) != 0 &&
378			    MMU_SFSR_GET_ASI(tf->tf_sfsr) == ASI_AIUP) {
379				if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
380				    tf->tf_tpc <= (u_long)copy_nofault_end) {
381					tf->tf_tpc = (u_long)copy_fault;
382					tf->tf_tnpc = tf->tf_tpc + 4;
383					error = 0;
384					break;
385				}
386				if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
387				    tf->tf_tpc <= (u_long)fs_nofault_end) {
388					tf->tf_tpc = (u_long)fs_fault;
389					tf->tf_tnpc = tf->tf_tpc + 4;
390					error = 0;
391					break;
392				}
393			}
394			error = 1;
395			break;
396		case T_DATA_ERROR:
397			/*
398			 * Handle PCI poke/peek as per UltraSPARC IIi
399			 * User's Manual 16.2.1, modulo checking the
400			 * TPC as USIII CPUs generate a precise trap
401			 * instead of a special deferred one.
402			 */
403			if (tf->tf_tpc > (u_long)fas_nofault_begin &&
404			    tf->tf_tpc < (u_long)fas_nofault_end) {
405				cache_flush();
406				cache_enable(PCPU_GET(impl));
407				tf->tf_tpc = (u_long)fas_fault;
408				tf->tf_tnpc = tf->tf_tpc + 4;
409				error = 0;
410				break;
411			}
412			error = 1;
413			break;
414		case T_CORRECTED_ECC_ERROR:
415			error = trap_cecc();
416			break;
417		default:
418			error = 1;
419			break;
420		}
421
422		if (error != 0) {
423			tf->tf_type &= ~T_KERNEL;
424			if (tf->tf_type < 0 || tf->tf_type >= T_MAX)
425				panic("trap: bad trap type %#lx (kernel)",
426				    tf->tf_type);
427			else if (trap_sig[tf->tf_type] == -1)
428				panic("trap: %s (kernel)",
429				    trap_msg[tf->tf_type]);
430		}
431	}
432	CTR1(KTR_TRAP, "trap: td=%p return", td);
433}
434
435static int
436trap_cecc(void)
437{
438	u_long eee;
439
440	/*
441	 * Turn off (non-)correctable error reporting while we're dealing
442	 * with the error.
443	 */
444	eee = ldxa(0, ASI_ESTATE_ERROR_EN_REG);
445	stxa_sync(0, ASI_ESTATE_ERROR_EN_REG, eee & ~(AA_ESTATE_NCEEN |
446	    AA_ESTATE_CEEN));
447	/* Flush the caches in order ensure no corrupt data got installed. */
448	cache_flush();
449	/* Ensure the caches are still turned on (should be). */
450	cache_enable(PCPU_GET(impl));
451	/* Clear the the error from the AFSR. */
452	stxa_sync(0, ASI_AFSR, ldxa(0, ASI_AFSR));
453	corrected_ecc++;
454	printf("corrected ECC error\n");
455	/* Turn (non-)correctable error reporting back on. */
456	stxa_sync(0, ASI_ESTATE_ERROR_EN_REG, eee);
457	return (0);
458}
459
460static int
461trap_pfault(struct thread *td, struct trapframe *tf)
462{
463	struct vmspace *vm;
464	struct proc *p;
465	vm_offset_t va;
466	vm_prot_t prot;
467	vm_map_entry_t entry;
468	u_long ctx;
469	int type;
470	int rv;
471
472	if (td == NULL)
473		return (-1);
474	KASSERT(td->td_pcb != NULL, ("trap_pfault: pcb NULL"));
475	KASSERT(td->td_proc != NULL, ("trap_pfault: curproc NULL"));
476	KASSERT(td->td_proc->p_vmspace != NULL, ("trap_pfault: vmspace NULL"));
477
478	p = td->td_proc;
479
480	rv = KERN_SUCCESS;
481	ctx = TLB_TAR_CTX(tf->tf_tar);
482	type = tf->tf_type & ~T_KERNEL;
483	va = TLB_TAR_VA(tf->tf_tar);
484
485	CTR4(KTR_TRAP, "trap_pfault: td=%p pm_ctx=%#lx va=%#lx ctx=%#lx",
486	    td, p->p_vmspace->vm_pmap.pm_context[curcpu], va, ctx);
487
488	if (type == T_DATA_PROTECTION)
489		prot = VM_PROT_WRITE;
490	else {
491		if (type == T_DATA_MISS)
492			prot = VM_PROT_READ;
493		else
494			prot = VM_PROT_READ | VM_PROT_EXECUTE;
495	}
496
497	if (ctx != TLB_CTX_KERNEL) {
498		if ((tf->tf_tstate & TSTATE_PRIV) != 0 &&
499		    (tf->tf_tpc >= (u_long)fs_nofault_intr_begin &&
500		    tf->tf_tpc <= (u_long)fs_nofault_intr_end)) {
501			tf->tf_tpc = (u_long)fs_fault;
502			tf->tf_tnpc = tf->tf_tpc + 4;
503			return (0);
504		}
505
506		/*
507		 * This is a fault on non-kernel virtual memory.
508		 */
509		vm = p->p_vmspace;
510
511		/*
512		 * Keep swapout from messing with us during this
513		 * critical time.
514		 */
515		PROC_LOCK(p);
516		++p->p_lock;
517		PROC_UNLOCK(p);
518
519		/* Fault in the user page. */
520		rv = vm_fault(&vm->vm_map, va, prot, VM_FAULT_NORMAL);
521
522		/*
523		 * Now the process can be swapped again.
524		 */
525		PROC_LOCK(p);
526		--p->p_lock;
527		PROC_UNLOCK(p);
528	} else {
529		/*
530		 * This is a fault on kernel virtual memory.  Attempts to
531		 * access kernel memory from user mode cause privileged
532		 * action traps, not page fault.
533		 */
534		KASSERT(tf->tf_tstate & TSTATE_PRIV,
535		    ("trap_pfault: fault on nucleus context from user mode"));
536
537		if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
538		    tf->tf_tpc <= (u_long)copy_nofault_end) {
539			vm_map_lock_read(kernel_map);
540			if (vm_map_lookup_entry(kernel_map, va, &entry) &&
541			    (entry->eflags & MAP_ENTRY_NOFAULT) != 0) {
542				tf->tf_tpc = (u_long)copy_fault;
543				tf->tf_tnpc = tf->tf_tpc + 4;
544				vm_map_unlock_read(kernel_map);
545				return (0);
546			}
547			vm_map_unlock_read(kernel_map);
548		}
549
550		/*
551		 * We don't have to worry about process locking or stacks in
552		 * the kernel.
553		 */
554		rv = vm_fault(kernel_map, va, prot, VM_FAULT_NORMAL);
555	}
556
557	CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d",
558	    td, va, rv);
559	if (rv == KERN_SUCCESS)
560		return (0);
561	if (ctx != TLB_CTX_KERNEL && (tf->tf_tstate & TSTATE_PRIV) != 0) {
562		if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
563		    tf->tf_tpc <= (u_long)fs_nofault_end) {
564			tf->tf_tpc = (u_long)fs_fault;
565			tf->tf_tnpc = tf->tf_tpc + 4;
566			return (0);
567		}
568		if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
569		    tf->tf_tpc <= (u_long)copy_nofault_end) {
570			tf->tf_tpc = (u_long)copy_fault;
571			tf->tf_tnpc = tf->tf_tpc + 4;
572			return (0);
573		}
574	}
575	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
576}
577
578/* Maximum number of arguments that can be passed via the out registers. */
579#define	REG_MAXARGS	6
580
581static int
582fetch_syscall_args(struct thread *td, struct syscall_args *sa)
583{
584	struct trapframe *tf;
585	struct proc *p;
586	int reg;
587	int regcnt;
588	int error;
589
590	p = td->td_proc;
591	tf = td->td_frame;
592	reg = 0;
593	regcnt = REG_MAXARGS;
594
595	sa->code = tf->tf_global[1];
596
597	if (p->p_sysent->sv_prepsyscall) {
598#if 0
599		(*p->p_sysent->sv_prepsyscall)(tf, sa->args, &sa->code,
600		    &params);
601#endif
602	} else if (sa->code == SYS_syscall || sa->code == SYS___syscall) {
603		sa->code = tf->tf_out[reg++];
604		regcnt--;
605	}
606
607	if (p->p_sysent->sv_mask)
608		sa->code &= p->p_sysent->sv_mask;
609
610	if (sa->code >= p->p_sysent->sv_size)
611		sa->callp = &p->p_sysent->sv_table[0];
612	else
613		sa->callp = &p->p_sysent->sv_table[sa->code];
614
615	sa->narg = sa->callp->sy_narg;
616	KASSERT(sa->narg <= sizeof(sa->args) / sizeof(sa->args[0]),
617	    ("Too many syscall arguments!"));
618	error = 0;
619	sa->argp = sa->args;
620	bcopy(&tf->tf_out[reg], sa->args, sizeof(sa->args[0]) * regcnt);
621	if (sa->narg > regcnt)
622		error = copyin((void *)(tf->tf_out[6] + SPOFF +
623		    offsetof(struct frame, fr_pad[6])), &sa->args[regcnt],
624		    (sa->narg - regcnt) * sizeof(sa->args[0]));
625
626	/*
627	 * This may result in two records if debugger modified
628	 * registers or memory during sleep at stop/ptrace point.
629	 */
630#ifdef KTRACE
631	if (KTRPOINT(td, KTR_SYSCALL))
632		ktrsyscall(sa->code, sa->narg, sa->argp);
633#endif
634	return (error);
635}
636
637/*
638 * Syscall handler
639 * The arguments to the syscall are passed in the out registers by the caller,
640 * and are saved in the trap frame.  The syscall number is passed in %g1 (and
641 * also saved in the trap frame).
642 */
643void
644syscall(struct trapframe *tf)
645{
646	struct syscall_args sa;
647	struct thread *td;
648	struct proc *p;
649	int error;
650
651	td = curthread;
652	KASSERT(td != NULL, ("trap: curthread NULL"));
653	KASSERT(td->td_proc != NULL, ("trap: curproc NULL"));
654
655	PCPU_INC(cnt.v_syscall);
656	p = td->td_proc;
657	td->td_syscalls++;
658
659	td->td_pticks = 0;
660	td->td_frame = tf;
661	if (td->td_ucred != p->p_ucred)
662		cred_update_thread(td);
663	if ((p->p_flag & P_TRACED) != 0) {
664		PROC_LOCK(p);
665		td->td_dbgflags &= ~TDB_USERWR;
666		PROC_UNLOCK(p);
667	}
668
669	/*
670	 * For syscalls, we don't want to retry the faulting instruction
671	 * (usually), instead we need to advance one instruction.
672	 */
673	td->td_pcb->pcb_tpc = tf->tf_tpc;
674	TF_DONE(tf);
675
676	error = fetch_syscall_args(td, &sa);
677	CTR5(KTR_SYSC, "syscall: td=%p %s(%#lx, %#lx, %#lx)", td,
678	    syscallnames[sa.code], sa.argp[0], sa.argp[1], sa.argp[2]);
679
680	if (error == 0) {
681		td->td_retval[0] = 0;
682		td->td_retval[1] = 0;
683
684		STOPEVENT(p, S_SCE, sa.narg);
685		PTRACESTOP_SC(p, td, S_PT_SCE);
686		if ((td->td_dbgflags & TDB_USERWR) != 0) {
687			/*
688			 * Reread syscall number and arguments if
689			 * debugger modified registers or memory.
690			 */
691			error = fetch_syscall_args(td, &sa);
692			if (error != 0)
693				goto retval;
694			td->td_retval[1] = 0;
695		}
696
697		AUDIT_SYSCALL_ENTER(sa.code, td);
698		error = (*sa.callp->sy_call)(td, sa.argp);
699		AUDIT_SYSCALL_EXIT(error, td);
700
701		CTR5(KTR_SYSC, "syscall: p=%p error=%d %s return %#lx %#lx",
702		    p, error, syscallnames[sa.code], td->td_retval[0],
703		    td->td_retval[1]);
704	}
705 retval:
706	cpu_set_syscall_retval(td, error);
707
708	/*
709	 * Check for misbehavior.
710	 */
711	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
712	    (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ?
713	    syscallnames[sa.code] : "???");
714	KASSERT(td->td_critnest == 0,
715	    ("System call %s returning in a critical section",
716	    (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ?
717	    syscallnames[sa.code] : "???"));
718	KASSERT(td->td_locks == 0,
719	    ("System call %s returning with %d locks held",
720	    (sa.code >= 0 && sa.code < SYS_MAXSYSCALL) ?
721	    syscallnames[sa.code] : "???", td->td_locks));
722
723	/*
724	 * Handle reschedule and other end-of-syscall issues.
725	 */
726	userret(td, tf);
727
728#ifdef KTRACE
729	if (KTRPOINT(td, KTR_SYSRET))
730		ktrsysret(sa.code, error, td->td_retval[0]);
731#endif
732	/*
733	 * This works because errno is findable through the
734	 * register set.  If we ever support an emulation where this
735	 * is not the case, this code will need to be revisited.
736	 */
737	STOPEVENT(p, S_SCX, sa.code);
738
739	PTRACESTOP_SC(p, td, S_PT_SCX);
740}
741