subr_syscall.c revision 217819
14Srgrimes/*-
21690Sdg * Copyright (C) 1994, David Greenman
31690Sdg * Copyright (c) 1990, 1993
41690Sdg *	The Regents of the University of California.  All rights reserved.
5174395Sjkoshy * Copyright (c) 2007 The FreeBSD Foundation
64Srgrimes *
74Srgrimes * This code is derived from software contributed to Berkeley by
84Srgrimes * the University of Utah, and William Jolitz.
94Srgrimes *
10174395Sjkoshy * Portions of this software were developed by A. Joseph Koshy under
11174395Sjkoshy * sponsorship from the FreeBSD Foundation and Google, Inc.
12174395Sjkoshy *
134Srgrimes * Redistribution and use in source and binary forms, with or without
144Srgrimes * modification, are permitted provided that the following conditions
154Srgrimes * are met:
164Srgrimes * 1. Redistributions of source code must retain the above copyright
174Srgrimes *    notice, this list of conditions and the following disclaimer.
184Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
194Srgrimes *    notice, this list of conditions and the following disclaimer in the
204Srgrimes *    documentation and/or other materials provided with the distribution.
214Srgrimes * 3. All advertising materials mentioning features or use of this software
224Srgrimes *    must display the following acknowledgement:
234Srgrimes *	This product includes software developed by the University of
244Srgrimes *	California, Berkeley and its contributors.
254Srgrimes * 4. Neither the name of the University nor the names of its contributors
264Srgrimes *    may be used to endorse or promote products derived from this software
274Srgrimes *    without specific prior written permission.
284Srgrimes *
294Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
304Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
314Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
324Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
334Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
344Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
354Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
364Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
374Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
384Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
394Srgrimes * SUCH DAMAGE.
404Srgrimes *
41608Srgrimes *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
424Srgrimes */
434Srgrimes
44116182Sobrien#include <sys/cdefs.h>
45116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/subr_trap.c 217819 2011-01-25 10:59:21Z kib $");
46116182Sobrien
47118240Speter#include "opt_ktrace.h"
48209258Srpaulo#include "opt_kdtrace.h"
49170640Sjeff#include "opt_sched.h"
5013203Swollman
511549Srgrimes#include <sys/param.h>
5265557Sjasone#include <sys/bus.h>
531549Srgrimes#include <sys/kernel.h>
5478983Sjhb#include <sys/lock.h>
5567365Sjhb#include <sys/mutex.h>
56174395Sjkoshy#include <sys/pmckern.h>
5778983Sjhb#include <sys/proc.h>
5899072Sjulian#include <sys/ktr.h>
59208453Skib#include <sys/pioctl.h>
60208453Skib#include <sys/ptrace.h>
6131389Sbde#include <sys/resourcevar.h>
62104964Sjeff#include <sys/sched.h>
6331389Sbde#include <sys/signalvar.h>
64208453Skib#include <sys/syscall.h>
65209613Sjhb#include <sys/syscallsubr.h>
66208453Skib#include <sys/sysent.h>
6778983Sjhb#include <sys/systm.h>
6812662Sdg#include <sys/vmmeter.h>
69118240Speter#ifdef KTRACE
70118240Speter#include <sys/uio.h>
71118240Speter#include <sys/ktrace.h>
72118240Speter#endif
73208453Skib#include <security/audit/audit.h>
74118240Speter
751549Srgrimes#include <machine/cpu.h>
761549Srgrimes
77184042Skmacy#ifdef XEN
78184042Skmacy#include <vm/vm.h>
79184042Skmacy#include <vm/vm_param.h>
80184042Skmacy#include <vm/pmap.h>
81184042Skmacy#endif
82184042Skmacy
83163606Srwatson#include <security/mac/mac_framework.h>
84163606Srwatson
8578983Sjhb/*
86167211Srwatson * Define the code needed before returning to user mode, for trap and
87167211Srwatson * syscall.
8878983Sjhb */
8971527Sjhbvoid
90155455Sphkuserret(struct thread *td, struct trapframe *frame)
911690Sdg{
9283366Sjulian	struct proc *p = td->td_proc;
93757Sdg
9499072Sjulian	CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid,
95173601Sjulian            td->td_name);
96197963Skib#if 0
97126661Srwatson#ifdef DIAGNOSTIC
98110190Sjulian	/* Check that we called signotify() enough. */
9978636Sjhb	PROC_LOCK(p);
100170307Sjeff	thread_lock(td);
101112888Sjeff	if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 ||
102111032Sjulian	    (td->td_flags & TDF_ASTPENDING) == 0))
103102266Srwatson		printf("failed to set signal flags properly for ast()\n");
104170307Sjeff	thread_unlock(td);
10582585Sdillon	PROC_UNLOCK(p);
10693793Sbde#endif
107197963Skib#endif
108152376Srwatson#ifdef KTRACE
109152376Srwatson	KTRUSERRET(td);
110152376Srwatson#endif
11193793Sbde	/*
112136837Sphk	 * If this thread tickled GEOM, we need to wait for the giggling to
113136837Sphk	 * stop before we return to userland
114136837Sphk	 */
115136837Sphk	if (td->td_pflags & TDP_GEOM)
116136837Sphk		g_waitidle();
117136837Sphk
118136837Sphk	/*
119110190Sjulian	 * Charge system time if profiling.
120110190Sjulian	 */
121213236Semaste	if (p->p_flag & P_PROFIL)
122155455Sphk		addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio);
123139324Sjeff	/*
124139324Sjeff	 * Let the scheduler adjust our priority etc.
125139324Sjeff	 */
126139324Sjeff	sched_userret(td);
127144061Sjeff	KASSERT(td->td_locks == 0,
128144061Sjeff	    ("userret: Returning with %d locks held.", td->td_locks));
129184042Skmacy#ifdef XEN
130184042Skmacy	PT_UPDATES_FLUSH();
131184042Skmacy#endif
1321690Sdg}
1331690Sdg
1344Srgrimes/*
13578983Sjhb * Process an asynchronous software trap.
13678983Sjhb * This is relatively easy.
13781493Sjhb * This function will return with preemption disabled.
1384Srgrimes */
139798Swollmanvoid
14099072Sjulianast(struct trapframe *framep)
14165557Sjasone{
142104297Sjhb	struct thread *td;
143104297Sjhb	struct proc *p;
14483366Sjulian	int flags;
14593793Sbde	int sig;
14665557Sjasone
147104297Sjhb	td = curthread;
148104297Sjhb	p = td->td_proc;
149104378Sjmallett
15099072Sjulian	CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid,
15199072Sjulian            p->p_comm);
15272911Sjhb	KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
153111883Sjhb	WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode");
15481493Sjhb	mtx_assert(&Giant, MA_NOTOWNED);
155170307Sjeff	THREAD_LOCK_ASSERT(td, MA_NOTOWNED);
15693390Sjake	td->td_frame = framep;
157155455Sphk	td->td_pticks = 0;
158104297Sjhb
15993390Sjake	/*
160172207Sjeff	 * This updates the td_flag's for the checks below in one
16193390Sjake	 * "atomic" operation with turning off the astpending flag.
16293390Sjake	 * If another AST is triggered while we are handling the
163172207Sjeff	 * AST's saved in flags, the astpending flag will be set and
16493390Sjake	 * ast() will be called again.
16593390Sjake	 */
166170307Sjeff	thread_lock(td);
167170307Sjeff	flags = td->td_flags;
168177471Sjeff	td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK |
169177471Sjeff	    TDF_NEEDRESCHED | TDF_ALRMPEND | TDF_PROFPEND | TDF_MACPEND);
170170307Sjeff	thread_unlock(td);
171170292Sattilio	PCPU_INC(cnt.v_trap);
172135573Sjhb
17393390Sjake	if (td->td_ucred != p->p_ucred)
17493390Sjake		cred_update_thread(td);
175132266Sjhb	if (td->td_pflags & TDP_OWEUPC && p->p_flag & P_PROFIL) {
176132266Sjhb		addupc_task(td, td->td_profil_addr, td->td_profil_ticks);
177132266Sjhb		td->td_profil_ticks = 0;
178132266Sjhb		td->td_pflags &= ~TDP_OWEUPC;
179131437Sjhb	}
180172207Sjeff	if (flags & TDF_ALRMPEND) {
18193390Sjake		PROC_LOCK(p);
18293390Sjake		psignal(p, SIGVTALRM);
18393390Sjake		PROC_UNLOCK(p);
18493390Sjake	}
185172207Sjeff	if (flags & TDF_PROFPEND) {
18693390Sjake		PROC_LOCK(p);
18793390Sjake		psignal(p, SIGPROF);
18893390Sjake		PROC_UNLOCK(p);
18993390Sjake	}
190106655Srwatson#ifdef MAC
191172207Sjeff	if (flags & TDF_MACPEND)
192106655Srwatson		mac_thread_userret(td);
193106655Srwatson#endif
194111032Sjulian	if (flags & TDF_NEEDRESCHED) {
195118240Speter#ifdef KTRACE
196118240Speter		if (KTRPOINT(td, KTR_CSW))
197119781Speter			ktrcsw(1, 1);
198118240Speter#endif
199170307Sjeff		thread_lock(td);
200163709Sjb		sched_prio(td, td->td_user_pri);
201178272Sjeff		mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL);
202170307Sjeff		thread_unlock(td);
203118240Speter#ifdef KTRACE
204118240Speter		if (KTRPOINT(td, KTR_CSW))
205119781Speter			ktrcsw(0, 1);
206118240Speter#endif
20793793Sbde	}
208197963Skib
209197963Skib	/*
210197963Skib	 * Check for signals. Unlocked reads of p_pendingcnt or
211197963Skib	 * p_siglist might cause process-directed signal to be handled
212197963Skib	 * later.
213197963Skib	 */
214197963Skib	if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
215197963Skib	    !SIGISEMPTY(p->p_siglist)) {
21693793Sbde		PROC_LOCK(p);
217114983Sjhb		mtx_lock(&p->p_sigacts->ps_mtx);
218195702Skib		while ((sig = cursig(td, SIG_STOP_ALLOWED)) != 0)
21993793Sbde			postsig(sig);
220114983Sjhb		mtx_unlock(&p->p_sigacts->ps_mtx);
22193793Sbde		PROC_UNLOCK(p);
22293793Sbde	}
223177471Sjeff	/*
224177471Sjeff	 * We need to check to see if we have to exit or wait due to a
225177471Sjeff	 * single threading requirement or some other STOP condition.
226177471Sjeff	 */
227177471Sjeff	if (flags & TDF_NEEDSUSPCHK) {
228177471Sjeff		PROC_LOCK(p);
229177471Sjeff		thread_suspend_check(0);
230177471Sjeff		PROC_UNLOCK(p);
231177471Sjeff	}
23265557Sjasone
233198508Skib	if (td->td_pflags & TDP_OLDMASK) {
234198508Skib		td->td_pflags &= ~TDP_OLDMASK;
235198508Skib		kern_sigprocmask(td, SIG_SETMASK, &td->td_oldsigmask, NULL, 0);
236198508Skib	}
237198508Skib
238155455Sphk	userret(td, framep);
23981493Sjhb	mtx_assert(&Giant, MA_NOTOWNED);
24024691Speter}
241208453Skib
242208453Skib#ifdef HAVE_SYSCALL_ARGS_DEF
243208566Skibconst char *
244208453Skibsyscallname(struct proc *p, u_int code)
245208453Skib{
246208453Skib	static const char unknown[] = "unknown";
247209697Skib	struct sysentvec *sv;
248208453Skib
249209697Skib	sv = p->p_sysent;
250209697Skib	if (sv->sv_syscallnames == NULL || code >= sv->sv_size)
251208453Skib		return (unknown);
252209697Skib	return (sv->sv_syscallnames[code]);
253208453Skib}
254208453Skib
255208453Skibint
256208453Skibsyscallenter(struct thread *td, struct syscall_args *sa)
257208453Skib{
258208453Skib	struct proc *p;
259208453Skib	int error, traced;
260208453Skib
261208453Skib	PCPU_INC(cnt.v_syscall);
262208453Skib	p = td->td_proc;
263208453Skib
264208453Skib	td->td_pticks = 0;
265208453Skib	if (td->td_ucred != p->p_ucred)
266208453Skib		cred_update_thread(td);
267208453Skib	if (p->p_flag & P_TRACED) {
268208453Skib		traced = 1;
269208453Skib		PROC_LOCK(p);
270208453Skib		td->td_dbgflags &= ~TDB_USERWR;
271208453Skib		td->td_dbgflags |= TDB_SCE;
272208453Skib		PROC_UNLOCK(p);
273208453Skib	} else
274208453Skib		traced = 0;
275208453Skib	error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
276208453Skib#ifdef KTRACE
277208453Skib	if (KTRPOINT(td, KTR_SYSCALL))
278208453Skib		ktrsyscall(sa->code, sa->narg, sa->args);
279208453Skib#endif
280208453Skib
281208453Skib	CTR6(KTR_SYSC,
282208453Skib"syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)",
283208453Skib	    td, td->td_proc->p_pid, syscallname(p, sa->code),
284208453Skib	    sa->args[0], sa->args[1], sa->args[2]);
285208453Skib
286208453Skib	if (error == 0) {
287208453Skib		STOPEVENT(p, S_SCE, sa->narg);
288208453Skib		PTRACESTOP_SC(p, td, S_PT_SCE);
289208453Skib		if (td->td_dbgflags & TDB_USERWR) {
290208453Skib			/*
291208453Skib			 * Reread syscall number and arguments if
292208453Skib			 * debugger modified registers or memory.
293208453Skib			 */
294208453Skib			error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
295208453Skib#ifdef KTRACE
296208453Skib			if (KTRPOINT(td, KTR_SYSCALL))
297208453Skib				ktrsyscall(sa->code, sa->narg, sa->args);
298208453Skib#endif
299208453Skib			if (error != 0)
300208453Skib				goto retval;
301208453Skib		}
302209579Skib		error = syscall_thread_enter(td, sa->callp);
303209579Skib		if (error != 0)
304209579Skib			goto retval;
305208453Skib
306208453Skib#ifdef KDTRACE_HOOKS
307208453Skib		/*
308208453Skib		 * If the systrace module has registered it's probe
309208453Skib		 * callback and if there is a probe active for the
310208453Skib		 * syscall 'entry', process the probe.
311208453Skib		 */
312208453Skib		if (systrace_probe_func != NULL && sa->callp->sy_entry != 0)
313208453Skib			(*systrace_probe_func)(sa->callp->sy_entry, sa->code,
314211617Srpaulo			    sa->callp, sa->args, 0);
315208453Skib#endif
316208453Skib
317208453Skib		AUDIT_SYSCALL_ENTER(sa->code, td);
318208453Skib		error = (sa->callp->sy_call)(td, sa->args);
319208453Skib		AUDIT_SYSCALL_EXIT(error, td);
320208453Skib
321208453Skib		/* Save the latest error return value. */
322208453Skib		td->td_errno = error;
323208453Skib
324208453Skib#ifdef KDTRACE_HOOKS
325208453Skib		/*
326208453Skib		 * If the systrace module has registered it's probe
327208453Skib		 * callback and if there is a probe active for the
328208453Skib		 * syscall 'return', process the probe.
329208453Skib		 */
330208453Skib		if (systrace_probe_func != NULL && sa->callp->sy_return != 0)
331208453Skib			(*systrace_probe_func)(sa->callp->sy_return, sa->code,
332211617Srpaulo			    sa->callp, NULL, (error) ? -1 : td->td_retval[0]);
333208453Skib#endif
334209579Skib		syscall_thread_exit(td, sa->callp);
335208453Skib		CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx",
336208453Skib		    p, error, td->td_retval[0], td->td_retval[1]);
337208453Skib	}
338208453Skib retval:
339208453Skib	if (traced) {
340208453Skib		PROC_LOCK(p);
341208453Skib		td->td_dbgflags &= ~TDB_SCE;
342208453Skib		PROC_UNLOCK(p);
343208453Skib	}
344208453Skib	(p->p_sysent->sv_set_syscall_retval)(td, error);
345208453Skib	return (error);
346208453Skib}
347208453Skib
348208453Skibvoid
349208453Skibsyscallret(struct thread *td, int error, struct syscall_args *sa __unused)
350208453Skib{
351208453Skib	struct proc *p;
352208453Skib	int traced;
353208453Skib
354208453Skib	p = td->td_proc;
355208453Skib
356208453Skib	/*
357208453Skib	 * Check for misbehavior.
358208453Skib	 */
359208453Skib	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
360208453Skib	    syscallname(p, sa->code));
361208453Skib	KASSERT(td->td_critnest == 0,
362208453Skib	    ("System call %s returning in a critical section",
363208453Skib	    syscallname(p, sa->code)));
364208453Skib	KASSERT(td->td_locks == 0,
365208453Skib	    ("System call %s returning with %d locks held",
366208453Skib	     syscallname(p, sa->code), td->td_locks));
367208453Skib
368208453Skib	/*
369208453Skib	 * Handle reschedule and other end-of-syscall issues
370208453Skib	 */
371208453Skib	userret(td, td->td_frame);
372208453Skib
373208453Skib	CTR4(KTR_SYSC, "syscall %s exit thread %p pid %d proc %s",
374208453Skib	    syscallname(p, sa->code), td, td->td_proc->p_pid, td->td_name);
375208453Skib
376208453Skib#ifdef KTRACE
377208453Skib	if (KTRPOINT(td, KTR_SYSRET))
378208453Skib		ktrsysret(sa->code, error, td->td_retval[0]);
379208453Skib#endif
380208453Skib
381208453Skib	if (p->p_flag & P_TRACED) {
382208453Skib		traced = 1;
383208453Skib		PROC_LOCK(p);
384208453Skib		td->td_dbgflags |= TDB_SCX;
385208453Skib		PROC_UNLOCK(p);
386208453Skib	} else
387208453Skib		traced = 0;
388208453Skib	/*
389208453Skib	 * This works because errno is findable through the
390208453Skib	 * register set.  If we ever support an emulation where this
391208453Skib	 * is not the case, this code will need to be revisited.
392208453Skib	 */
393208453Skib	STOPEVENT(p, S_SCX, sa->code);
394208453Skib	PTRACESTOP_SC(p, td, S_PT_SCX);
395217819Skib	if (traced || (td->td_dbgflags & (TDB_EXEC | TDB_FORK)) != 0) {
396208453Skib		PROC_LOCK(p);
397217819Skib		td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC | TDB_FORK);
398208453Skib		PROC_UNLOCK(p);
399208453Skib	}
400208453Skib}
401208453Skib#endif /* HAVE_SYSCALL_ARGS_DEF */
402