subr_syscall.c revision 163709
1130803Smarcel/*-
2130803Smarcel * Copyright (C) 1994, David Greenman
3130803Smarcel * Copyright (c) 1990, 1993
4130803Smarcel *	The Regents of the University of California.  All rights reserved.
5130803Smarcel *
6130803Smarcel * This code is derived from software contributed to Berkeley by
7130803Smarcel * the University of Utah, and William Jolitz.
8130803Smarcel *
9130803Smarcel * Redistribution and use in source and binary forms, with or without
10130803Smarcel * modification, are permitted provided that the following conditions
11130803Smarcel * are met:
12130803Smarcel * 1. Redistributions of source code must retain the above copyright
13130803Smarcel *    notice, this list of conditions and the following disclaimer.
14130803Smarcel * 2. Redistributions in binary form must reproduce the above copyright
15130803Smarcel *    notice, this list of conditions and the following disclaimer in the
16130803Smarcel *    documentation and/or other materials provided with the distribution.
17130803Smarcel * 3. All advertising materials mentioning features or use of this software
18130803Smarcel *    must display the following acknowledgement:
19130803Smarcel *	This product includes software developed by the University of
20130803Smarcel *	California, Berkeley and its contributors.
21130803Smarcel * 4. Neither the name of the University nor the names of its contributors
22130803Smarcel *    may be used to endorse or promote products derived from this software
23130803Smarcel *    without specific prior written permission.
24130803Smarcel *
25130803Smarcel * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26130803Smarcel * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27130803Smarcel * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28130803Smarcel * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29130803Smarcel * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30130803Smarcel * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31130803Smarcel * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32130803Smarcel * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33130803Smarcel * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34130803Smarcel * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35130803Smarcel * SUCH DAMAGE.
36130803Smarcel *
37130803Smarcel *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
38130803Smarcel */
39130803Smarcel
40130803Smarcel#include <sys/cdefs.h>
41130803Smarcel__FBSDID("$FreeBSD: head/sys/kern/subr_trap.c 163709 2006-10-26 21:42:22Z jb $");
42130803Smarcel
43130803Smarcel#include "opt_ktrace.h"
44130803Smarcel#include "opt_mac.h"
45130803Smarcel#ifdef __i386__
46130803Smarcel#include "opt_npx.h"
47130803Smarcel#endif
48130803Smarcel
49130803Smarcel#include <sys/param.h>
50130803Smarcel#include <sys/bus.h>
51130803Smarcel#include <sys/kernel.h>
52130803Smarcel#include <sys/lock.h>
53130803Smarcel#include <sys/mutex.h>
54130803Smarcel#include <sys/proc.h>
55130803Smarcel#include <sys/ktr.h>
56130803Smarcel#include <sys/resourcevar.h>
57130803Smarcel#include <sys/sched.h>
58130803Smarcel#include <sys/signalvar.h>
59130803Smarcel#include <sys/systm.h>
60130803Smarcel#include <sys/vmmeter.h>
61130803Smarcel#ifdef KTRACE
62130803Smarcel#include <sys/uio.h>
63130803Smarcel#include <sys/ktrace.h>
64130803Smarcel#endif
65130803Smarcel
66130803Smarcel#include <machine/cpu.h>
67130803Smarcel#include <machine/pcb.h>
68130803Smarcel
69130803Smarcel#include <security/mac/mac_framework.h>
70130803Smarcel
71130803Smarcel/*
72130803Smarcel * Define the code needed before returning to user mode, for
73130803Smarcel * trap and syscall.
74130803Smarcel *
75130803Smarcel * MPSAFE
76130803Smarcel */
77130803Smarcelvoid
78130803Smarceluserret(struct thread *td, struct trapframe *frame)
79130803Smarcel{
80130803Smarcel	struct proc *p = td->td_proc;
81130803Smarcel
82130803Smarcel	CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid,
83130803Smarcel            p->p_comm);
84130803Smarcel#ifdef DIAGNOSTIC
85130803Smarcel	/* Check that we called signotify() enough. */
86130803Smarcel	PROC_LOCK(p);
87130803Smarcel	mtx_lock_spin(&sched_lock);
88130803Smarcel	if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 ||
89130803Smarcel	    (td->td_flags & TDF_ASTPENDING) == 0))
90130803Smarcel		printf("failed to set signal flags properly for ast()\n");
91130803Smarcel	mtx_unlock_spin(&sched_lock);
92130803Smarcel	PROC_UNLOCK(p);
93130803Smarcel#endif
94130803Smarcel
95130803Smarcel#ifdef KTRACE
96130803Smarcel	KTRUSERRET(td);
97130803Smarcel#endif
98130803Smarcel
99130803Smarcel	/*
100130803Smarcel	 * If this thread tickled GEOM, we need to wait for the giggling to
101130803Smarcel	 * stop before we return to userland
102130803Smarcel	 */
103130803Smarcel	if (td->td_pflags & TDP_GEOM)
104130803Smarcel		g_waitidle();
105130803Smarcel
106130803Smarcel	/*
107130803Smarcel	 * We need to check to see if we have to exit or wait due to a
108130803Smarcel	 * single threading requirement or some other STOP condition.
109130803Smarcel	 * Don't bother doing all the work if the stop bits are not set
110130803Smarcel	 * at this time.. If we miss it, we miss it.. no big deal.
111130803Smarcel	 */
112130803Smarcel	if (P_SHOULDSTOP(p)) {
113130803Smarcel		PROC_LOCK(p);
114130803Smarcel		thread_suspend_check(0);	/* Can suspend or kill */
115130803Smarcel		PROC_UNLOCK(p);
116130803Smarcel	}
117130803Smarcel
118130803Smarcel#ifdef KSE
119130803Smarcel	/*
120130803Smarcel	 * Do special thread processing, e.g. upcall tweaking and such.
121130803Smarcel	 */
122130803Smarcel	if (p->p_flag & P_SA)
123130803Smarcel		thread_userret(td, frame);
124130803Smarcel#endif
125130803Smarcel
126130803Smarcel	/*
127130803Smarcel	 * Charge system time if profiling.
128130803Smarcel	 */
129130803Smarcel	if (p->p_flag & P_PROFIL) {
130130803Smarcel
131130803Smarcel		addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio);
132130803Smarcel	}
133130803Smarcel
134130803Smarcel	/*
135130803Smarcel	 * Let the scheduler adjust our priority etc.
136130803Smarcel	 */
137130803Smarcel	sched_userret(td);
138130803Smarcel	KASSERT(td->td_locks == 0,
139130803Smarcel	    ("userret: Returning with %d locks held.", td->td_locks));
140130803Smarcel}
141130803Smarcel
142130803Smarcel/*
143130803Smarcel * Process an asynchronous software trap.
144130803Smarcel * This is relatively easy.
145130803Smarcel * This function will return with preemption disabled.
146130803Smarcel */
147130803Smarcelvoid
148130803Smarcelast(struct trapframe *framep)
149130803Smarcel{
150130803Smarcel	struct thread *td;
151130803Smarcel	struct proc *p;
152130803Smarcel#ifdef KSE
153130803Smarcel	struct ksegrp *kg;
154130803Smarcel#endif
155130803Smarcel	struct rlimit rlim;
156130803Smarcel	int sflag;
157130803Smarcel	int flags;
158130803Smarcel	int sig;
159130803Smarcel#if defined(DEV_NPX) && !defined(SMP)
160130803Smarcel	int ucode;
161130803Smarcel	ksiginfo_t ksi;
162130803Smarcel#endif
163130803Smarcel
164130803Smarcel	td = curthread;
165130803Smarcel	p = td->td_proc;
166130803Smarcel#ifdef KSE
167130803Smarcel	kg = td->td_ksegrp;
168130803Smarcel#endif
169130803Smarcel
170130803Smarcel	CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid,
171130803Smarcel            p->p_comm);
172130803Smarcel	KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
173130803Smarcel	WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode");
174130803Smarcel	mtx_assert(&Giant, MA_NOTOWNED);
175130803Smarcel	mtx_assert(&sched_lock, MA_NOTOWNED);
176130803Smarcel	td->td_frame = framep;
177130803Smarcel	td->td_pticks = 0;
178130803Smarcel
179130803Smarcel#ifdef KSE
180130803Smarcel	if ((p->p_flag & P_SA) && (td->td_mailbox == NULL))
181130803Smarcel		thread_user_enter(td);
182130803Smarcel#endif
183130803Smarcel
184130803Smarcel	/*
185130803Smarcel	 * This updates the p_sflag's for the checks below in one
186130803Smarcel	 * "atomic" operation with turning off the astpending flag.
187130803Smarcel	 * If another AST is triggered while we are handling the
188130803Smarcel	 * AST's saved in sflag, the astpending flag will be set and
189130803Smarcel	 * ast() will be called again.
190130803Smarcel	 */
191130803Smarcel	mtx_lock_spin(&sched_lock);
192130803Smarcel	flags = td->td_flags;
193130803Smarcel	sflag = p->p_sflag;
194130803Smarcel	if (p->p_sflag & (PS_ALRMPEND | PS_PROFPEND | PS_XCPU))
195130803Smarcel		p->p_sflag &= ~(PS_ALRMPEND | PS_PROFPEND | PS_XCPU);
196130803Smarcel#ifdef MAC
197130803Smarcel	if (p->p_sflag & PS_MACPEND)
198130803Smarcel		p->p_sflag &= ~PS_MACPEND;
199130803Smarcel#endif
200130803Smarcel	td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK |
201130803Smarcel	    TDF_NEEDRESCHED | TDF_INTERRUPT);
202130803Smarcel	cnt.v_trap++;
203130803Smarcel	mtx_unlock_spin(&sched_lock);
204130803Smarcel
205130803Smarcel	/*
206130803Smarcel	 * XXXKSE While the fact that we owe a user profiling
207130803Smarcel	 * tick is stored per KSE in this code, the statistics
208130803Smarcel	 * themselves are still stored per process.
209130803Smarcel	 * This should probably change, by which I mean that
210130803Smarcel	 * possibly the location of both might change.
211130803Smarcel	 */
212130803Smarcel	if (td->td_ucred != p->p_ucred)
213130803Smarcel		cred_update_thread(td);
214130803Smarcel	if (td->td_pflags & TDP_OWEUPC && p->p_flag & P_PROFIL) {
215130803Smarcel		addupc_task(td, td->td_profil_addr, td->td_profil_ticks);
216130803Smarcel		td->td_profil_ticks = 0;
217130803Smarcel		td->td_pflags &= ~TDP_OWEUPC;
218130803Smarcel	}
219130803Smarcel	if (sflag & PS_ALRMPEND) {
220130803Smarcel		PROC_LOCK(p);
221130803Smarcel		psignal(p, SIGVTALRM);
222130803Smarcel		PROC_UNLOCK(p);
223130803Smarcel	}
224130803Smarcel#if defined(DEV_NPX) && !defined(SMP)
225130803Smarcel	if (PCPU_GET(curpcb)->pcb_flags & PCB_NPXTRAP) {
226130803Smarcel		atomic_clear_int(&PCPU_GET(curpcb)->pcb_flags,
227130803Smarcel		    PCB_NPXTRAP);
228130803Smarcel		ucode = npxtrap();
229130803Smarcel		if (ucode != -1) {
230130803Smarcel			ksiginfo_init_trap(&ksi);
231130803Smarcel			ksi.ksi_signo = SIGFPE;
232130803Smarcel			ksi.ksi_code = ucode;
233130803Smarcel			trapsignal(td, &ksi);
234130803Smarcel		}
235130803Smarcel	}
236130803Smarcel#endif
237130803Smarcel	if (sflag & PS_PROFPEND) {
238130803Smarcel		PROC_LOCK(p);
239130803Smarcel		psignal(p, SIGPROF);
240130803Smarcel		PROC_UNLOCK(p);
241130803Smarcel	}
242130803Smarcel	if (sflag & PS_XCPU) {
243130803Smarcel		PROC_LOCK(p);
244130803Smarcel		lim_rlimit(p, RLIMIT_CPU, &rlim);
245130803Smarcel		mtx_lock_spin(&sched_lock);
246130803Smarcel		if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) {
247130803Smarcel			mtx_unlock_spin(&sched_lock);
248130803Smarcel			killproc(p, "exceeded maximum CPU limit");
249130803Smarcel		} else {
250130803Smarcel			if (p->p_cpulimit < rlim.rlim_max)
251130803Smarcel				p->p_cpulimit += 5;
252130803Smarcel			mtx_unlock_spin(&sched_lock);
253130803Smarcel			psignal(p, SIGXCPU);
254130803Smarcel		}
255130803Smarcel		PROC_UNLOCK(p);
256130803Smarcel	}
257#ifdef MAC
258	if (sflag & PS_MACPEND)
259		mac_thread_userret(td);
260#endif
261	if (flags & TDF_NEEDRESCHED) {
262#ifdef KTRACE
263		if (KTRPOINT(td, KTR_CSW))
264			ktrcsw(1, 1);
265#endif
266		mtx_lock_spin(&sched_lock);
267#ifdef KSE
268		sched_prio(td, kg->kg_user_pri);
269#else
270		sched_prio(td, td->td_user_pri);
271#endif
272		mi_switch(SW_INVOL, NULL);
273		mtx_unlock_spin(&sched_lock);
274#ifdef KTRACE
275		if (KTRPOINT(td, KTR_CSW))
276			ktrcsw(0, 1);
277#endif
278	}
279	if (flags & TDF_NEEDSIGCHK) {
280		PROC_LOCK(p);
281		mtx_lock(&p->p_sigacts->ps_mtx);
282		while ((sig = cursig(td)) != 0)
283			postsig(sig);
284		mtx_unlock(&p->p_sigacts->ps_mtx);
285		PROC_UNLOCK(p);
286	}
287
288	userret(td, framep);
289	mtx_assert(&Giant, MA_NOTOWNED);
290}
291