lwproc.c revision 1.25
1254721Semaste/*      $NetBSD: lwproc.c,v 1.25 2013/12/09 16:21:15 pooka Exp $	*/
2254721Semaste
3254721Semaste/*
4254721Semaste * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5254721Semaste *
6254721Semaste * Redistribution and use in source and binary forms, with or without
7254721Semaste * modification, are permitted provided that the following conditions
8254721Semaste * are met:
9254721Semaste * 1. Redistributions of source code must retain the above copyright
10254721Semaste *    notice, this list of conditions and the following disclaimer.
11254721Semaste * 2. Redistributions in binary form must reproduce the above copyright
12254721Semaste *    notice, this list of conditions and the following disclaimer in the
13296417Sdim *    documentation and/or other materials provided with the distribution.
14254721Semaste *
15254721Semaste * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16296417Sdim * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17296417Sdim * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18296417Sdim * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19296417Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20296417Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21296417Sdim * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22296417Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23254721Semaste * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24254721Semaste * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25288943Sdim * SUCH DAMAGE.
26262528Semaste */
27254721Semaste
28254721Semaste#include <sys/cdefs.h>
29254721Semaste__KERNEL_RCSID(0, "$NetBSD: lwproc.c,v 1.25 2013/12/09 16:21:15 pooka Exp $");
30254721Semaste
31280031Sdim#include <sys/param.h>
32254721Semaste#include <sys/atomic.h>
33254721Semaste#include <sys/filedesc.h>
34254721Semaste#include <sys/kauth.h>
35254721Semaste#include <sys/kmem.h>
36276479Sdim#include <sys/lwp.h>
37276479Sdim#include <sys/ktrace.h>
38276479Sdim#include <sys/pool.h>
39276479Sdim#include <sys/proc.h>
40276479Sdim#include <sys/queue.h>
41296417Sdim#include <sys/resourcevar.h>
42296417Sdim#include <sys/uidinfo.h>
43276479Sdim
44254721Semaste#include <rump/rumpuser.h>
45254721Semaste
46254721Semaste#include "rump_private.h"
47254721Semaste
48254721Semastestruct emul *emul_default = &emul_netbsd;
49254721Semaste
50254721Semastestatic void
51254721Semastelwproc_proc_free(struct proc *p)
52254721Semaste{
53254721Semaste	kauth_cred_t cred;
54254721Semaste
55254721Semaste	KASSERT(p->p_stat == SDYING || p->p_stat == SDEAD);
56254721Semaste
57254721Semaste#ifdef KTRACE
58254721Semaste	if (p->p_tracep) {
59254721Semaste		mutex_enter(&ktrace_lock);
60254721Semaste		ktrderef(p);
61254721Semaste		mutex_exit(&ktrace_lock);
62296417Sdim	}
63254721Semaste#endif
64254721Semaste
65296417Sdim	mutex_enter(proc_lock);
66254721Semaste
67254721Semaste	KASSERT(p->p_nlwps == 0);
68254721Semaste	KASSERT(LIST_EMPTY(&p->p_lwps));
69254721Semaste
70254721Semaste	LIST_REMOVE(p, p_list);
71254721Semaste	LIST_REMOVE(p, p_sibling);
72254721Semaste	proc_free_pid(p->p_pid); /* decrements nprocs */
73254721Semaste	proc_leavepgrp(p); /* releases proc_lock */
74288943Sdim
75254721Semaste	cred = p->p_cred;
76288943Sdim	chgproccnt(kauth_cred_getuid(cred), -1);
77288943Sdim	if (rump_proc_vfs_release)
78254721Semaste		rump_proc_vfs_release(p);
79254721Semaste
80254721Semaste	lim_free(p->p_limit);
81254721Semaste	pstatsfree(p->p_stats);
82254721Semaste	kauth_cred_free(p->p_cred);
83254721Semaste	proc_finispecific(p);
84254721Semaste
85254721Semaste	mutex_obj_free(p->p_lock);
86254721Semaste	mutex_destroy(&p->p_stmutex);
87254721Semaste	mutex_destroy(&p->p_auxlock);
88296417Sdim	rw_destroy(&p->p_reflock);
89296417Sdim	cv_destroy(&p->p_waitcv);
90254721Semaste	cv_destroy(&p->p_lwpcv);
91296417Sdim
92296417Sdim	/* non-kernel vmspaces are not shared */
93296417Sdim	if (!RUMP_LOCALPROC_P(p)) {
94296417Sdim		KASSERT(p->p_vmspace->vm_refcnt == 1);
95296417Sdim		kmem_free(p->p_vmspace, sizeof(*p->p_vmspace));
96296417Sdim	}
97296417Sdim
98296417Sdim	proc_free_mem(p);
99296417Sdim}
100296417Sdim
101296417Sdim/*
102296417Sdim * Allocate a new process.  Mostly mimic fork by
103296417Sdim * copying the properties of the parent.  However, there are some
104296417Sdim * differences.
105296417Sdim *
106296417Sdim * Switch to the new lwp and return a pointer to it.
107296417Sdim */
108254721Semastestatic struct proc *
109254721Semastelwproc_newproc(struct proc *parent, int flags)
110254721Semaste{
111254721Semaste	uid_t uid = kauth_cred_getuid(parent->p_cred);
112254721Semaste	struct proc *p;
113254721Semaste
114254721Semaste	/* maxproc not enforced */
115254721Semaste	atomic_inc_uint(&nprocs);
116262528Semaste
117254721Semaste	/* allocate process */
118254721Semaste	p = proc_alloc();
119262528Semaste	memset(&p->p_startzero, 0,
120254721Semaste	    offsetof(struct proc, p_endzero)
121254721Semaste	      - offsetof(struct proc, p_startzero));
122262528Semaste	memcpy(&p->p_startcopy, &parent->p_startcopy,
123254721Semaste	    offsetof(struct proc, p_endcopy)
124254721Semaste	      - offsetof(struct proc, p_startcopy));
125262528Semaste
126254721Semaste	/* some other garbage we need to zero */
127254721Semaste	p->p_sigacts = NULL;
128262528Semaste	p->p_aio = NULL;
129254721Semaste	p->p_dtrace = NULL;
130254721Semaste	p->p_mqueue_cnt = p->p_exitsig = 0;
131262528Semaste	p->p_flag = p->p_sflag = p->p_slflag = p->p_lflag = p->p_stflag = 0;
132254721Semaste	p->p_trace_enabled = 0;
133262528Semaste	p->p_xstat = p->p_acflag = 0;
134254721Semaste	p->p_stackbase = 0;
135254721Semaste
136254721Semaste	p->p_stats = pstatscopy(parent->p_stats);
137254721Semaste
138254721Semaste	p->p_vmspace = vmspace_kernel();
139254721Semaste	p->p_emul = emul_default;
140254721Semaste	if (*parent->p_comm)
141254721Semaste		strcpy(p->p_comm, parent->p_comm);
142254721Semaste	else
143254721Semaste		strcpy(p->p_comm, "rumproc");
144254721Semaste
145254721Semaste	if ((flags & RUMP_RFCFDG) == 0)
146254721Semaste		KASSERT(parent == curproc);
147254721Semaste	if (flags & RUMP_RFFDG)
148254721Semaste		p->p_fd = fd_copy();
149254721Semaste	else if (flags & RUMP_RFCFDG)
150254721Semaste		p->p_fd = fd_init(NULL);
151254721Semaste	else
152254721Semaste		fd_share(p);
153254721Semaste
154254721Semaste	lim_addref(parent->p_limit);
155254721Semaste	p->p_limit = parent->p_limit;
156254721Semaste
157254721Semaste	LIST_INIT(&p->p_lwps);
158254721Semaste	LIST_INIT(&p->p_children);
159254721Semaste
160254721Semaste	p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
161254721Semaste	mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
162254721Semaste	mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
163254721Semaste	rw_init(&p->p_reflock);
164254721Semaste	cv_init(&p->p_waitcv, "pwait");
165254721Semaste	cv_init(&p->p_lwpcv, "plwp");
166254721Semaste
167254721Semaste	p->p_pptr = parent;
168254721Semaste	p->p_ppid = parent->p_pid;
169254721Semaste	p->p_stat = SACTIVE;
170254721Semaste
171254721Semaste	kauth_proc_fork(parent, p);
172254721Semaste
173254721Semaste	/* initialize cwd in rump kernels with vfs */
174254721Semaste	if (rump_proc_vfs_init)
175254721Semaste		rump_proc_vfs_init(p);
176254721Semaste
177254721Semaste	chgproccnt(uid, 1); /* not enforced */
178254721Semaste
179254721Semaste	/* publish proc various proc lists */
180254721Semaste	mutex_enter(proc_lock);
181254721Semaste	LIST_INSERT_HEAD(&allproc, p, p_list);
182254721Semaste	LIST_INSERT_HEAD(&parent->p_children, p, p_sibling);
183254721Semaste	LIST_INSERT_AFTER(parent, p, p_pglist);
184254721Semaste	mutex_exit(proc_lock);
185254721Semaste
186254721Semaste	return p;
187254721Semaste}
188254721Semaste
189254721Semastestatic void
190254721Semastelwproc_freelwp(struct lwp *l)
191254721Semaste{
192254721Semaste	struct proc *p;
193254721Semaste
194254721Semaste	p = l->l_proc;
195254721Semaste	mutex_enter(p->p_lock);
196254721Semaste
197254721Semaste	KASSERT(l->l_flag & LW_WEXIT);
198254721Semaste	KASSERT(l->l_refcnt == 0);
199254721Semaste
200254721Semaste	/* ok, zero references, continue with nuke */
201254721Semaste	LIST_REMOVE(l, l_sibling);
202254721Semaste	KASSERT(p->p_nlwps >= 1);
203254721Semaste	if (--p->p_nlwps == 0) {
204254721Semaste		KASSERT(p != &proc0);
205254721Semaste		p->p_stat = SDEAD;
206254721Semaste	}
207254721Semaste	cv_broadcast(&p->p_lwpcv); /* nobody sleeps on this in a rump kernel? */
208254721Semaste	kauth_cred_free(l->l_cred);
209254721Semaste	mutex_exit(p->p_lock);
210254721Semaste
211262528Semaste	mutex_enter(proc_lock);
212262528Semaste	LIST_REMOVE(l, l_list);
213262528Semaste	mutex_exit(proc_lock);
214262528Semaste
215262528Semaste	if (l->l_name)
216254721Semaste		kmem_free(l->l_name, MAXCOMLEN);
217262528Semaste	lwp_finispecific(l);
218262528Semaste
219262528Semaste	rumpuser_curlwpop(RUMPUSER_LWP_DESTROY, l);
220254721Semaste	membar_exit();
221254721Semaste	kmem_free(l, sizeof(*l));
222262528Semaste
223254721Semaste	if (p->p_stat == SDEAD)
224262528Semaste		lwproc_proc_free(p);
225262528Semaste}
226262528Semaste
227262528Semasteextern kmutex_t unruntime_lock;
228254721Semaste
229262528Semaste/*
230262528Semaste * called with p_lock held, releases lock before return
231262528Semaste */
232262528Semastestatic void
233296417Sdimlwproc_makelwp(struct proc *p, struct lwp *l, bool doswitch, bool procmake)
234296417Sdim{
235296417Sdim
236296417Sdim	p->p_nlwps++;
237254721Semaste	l->l_refcnt = 1;
238288943Sdim	l->l_proc = p;
239288943Sdim
240288943Sdim	l->l_lid = p->p_nlwpid++;
241262528Semaste	LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
242262528Semaste
243262528Semaste	l->l_fd = p->p_fd;
244288943Sdim	l->l_cpu = rump_cpu;
245288943Sdim	l->l_target_cpu = rump_cpu; /* Initial target CPU always the same */
246254721Semaste	l->l_stat = LSRUN;
247288943Sdim	l->l_mutex = &unruntime_lock;
248288943Sdim	TAILQ_INIT(&l->l_ld_locks);
249254721Semaste	mutex_exit(p->p_lock);
250254721Semaste
251262528Semaste	lwp_update_creds(l);
252254721Semaste	lwp_initspecific(l);
253254721Semaste
254254721Semaste	membar_enter();
255254721Semaste	rumpuser_curlwpop(RUMPUSER_LWP_CREATE, l);
256254721Semaste	if (doswitch) {
257254721Semaste		rump_lwproc_switch(l);
258254721Semaste	}
259254721Semaste
260254721Semaste	/* filedesc already has refcount 1 when process is created */
261254721Semaste	if (!procmake) {
262254721Semaste		fd_hold(l);
263254721Semaste	}
264254721Semaste
265254721Semaste	mutex_enter(proc_lock);
266254721Semaste	LIST_INSERT_HEAD(&alllwp, l, l_list);
267254721Semaste	mutex_exit(proc_lock);
268254721Semaste}
269254721Semaste
270254721Semastestruct lwp *
271296417Sdimrump__lwproc_alloclwp(struct proc *p)
272254721Semaste{
273254721Semaste	struct lwp *l;
274254721Semaste	bool newproc = false;
275254721Semaste
276296417Sdim	if (p == NULL) {
277296417Sdim		p = lwproc_newproc(&proc0, 0);
278296417Sdim		newproc = true;
279296417Sdim	}
280296417Sdim
281254721Semaste	l = kmem_zalloc(sizeof(*l), KM_SLEEP);
282254721Semaste
283254721Semaste	mutex_enter(p->p_lock);
284280031Sdim	KASSERT((p->p_sflag & PS_RUMP_LWPEXIT) == 0);
285288943Sdim	lwproc_makelwp(p, l, false, newproc);
286280031Sdim
287280031Sdim	return l;
288288943Sdim}
289254721Semaste
290280031Sdimint
291288943Sdimrump_lwproc_newlwp(pid_t pid)
292254721Semaste{
293254721Semaste	struct proc *p;
294254721Semaste	struct lwp *l;
295254721Semaste
296254721Semaste	l = kmem_zalloc(sizeof(*l), KM_SLEEP);
297254721Semaste	mutex_enter(proc_lock);
298254721Semaste	p = proc_find_raw(pid);
299254721Semaste	if (p == NULL) {
300254721Semaste		mutex_exit(proc_lock);
301254721Semaste		kmem_free(l, sizeof(*l));
302254721Semaste		return ESRCH;
303254721Semaste	}
304254721Semaste	mutex_enter(p->p_lock);
305254721Semaste	if (p->p_sflag & PS_RUMP_LWPEXIT) {
306254721Semaste		mutex_exit(proc_lock);
307254721Semaste		mutex_exit(p->p_lock);
308254721Semaste		kmem_free(l, sizeof(*l));
309254721Semaste		return EBUSY;
310254721Semaste	}
311254721Semaste	mutex_exit(proc_lock);
312254721Semaste	lwproc_makelwp(p, l, true, false);
313254721Semaste
314254721Semaste	return 0;
315254721Semaste}
316254721Semaste
317254721Semasteint
318254721Semasterump_lwproc_rfork(int flags)
319254721Semaste{
320254721Semaste	struct proc *p;
321254721Semaste	struct lwp *l;
322254721Semaste
323254721Semaste	if (flags & ~(RUMP_RFFDG|RUMP_RFCFDG) ||
324254721Semaste	    (~flags & (RUMP_RFFDG|RUMP_RFCFDG)) == 0)
325254721Semaste		return EINVAL;
326254721Semaste
327254721Semaste	p = lwproc_newproc(curproc, flags);
328254721Semaste	l = kmem_zalloc(sizeof(*l), KM_SLEEP);
329254721Semaste	mutex_enter(p->p_lock);
330254721Semaste	KASSERT((p->p_sflag & PS_RUMP_LWPEXIT) == 0);
331254721Semaste	lwproc_makelwp(p, l, true, true);
332254721Semaste
333254721Semaste	return 0;
334258054Semaste}
335258054Semaste
336258054Semaste/*
337296417Sdim * Switch to a new process/thread.  Release previous one if
338296417Sdim * deemed to be exiting.  This is considered a slow path for
339296417Sdim * rump kernel entry.
340296417Sdim */
341296417Sdimvoid
342296417Sdimrump_lwproc_switch(struct lwp *newlwp)
343296417Sdim{
344296417Sdim	struct lwp *l = curlwp;
345296417Sdim
346296417Sdim	KASSERT(!(l->l_flag & LW_WEXIT) || newlwp);
347296417Sdim
348296417Sdim	if (__predict_false(newlwp && (newlwp->l_pflag & LP_RUNNING)))
349296417Sdim		panic("lwp %p (%d:%d) already running",
350296417Sdim		    newlwp, newlwp->l_proc->p_pid, newlwp->l_lid);
351296417Sdim
352296417Sdim	if (newlwp == NULL) {
353296417Sdim		l->l_pflag &= ~LP_RUNNING;
354296417Sdim		l->l_flag |= LW_RUMP_CLEAR;
355280031Sdim		return;
356280031Sdim	}
357280031Sdim
358254721Semaste	/* fd_free() must be called from curlwp context.  talk about ugh */
359254721Semaste	if (l->l_flag & LW_WEXIT) {
360254721Semaste		fd_free();
361254721Semaste	}
362254721Semaste
363254721Semaste	KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks);
364254721Semaste	rumpuser_curlwpop(RUMPUSER_LWP_CLEAR, l);
365258884Semaste
366254721Semaste	newlwp->l_cpu = newlwp->l_target_cpu = l->l_cpu;
367254721Semaste	newlwp->l_mutex = l->l_mutex;
368254721Semaste	newlwp->l_pflag |= LP_RUNNING;
369262528Semaste
370288943Sdim	rumpuser_curlwpop(RUMPUSER_LWP_SET, newlwp);
371262528Semaste	curcpu()->ci_curlwp = newlwp;
372262528Semaste	KERNEL_LOCK(newlwp->l_biglocks, NULL);
373262528Semaste
374262528Semaste	/*
375262528Semaste	 * Check if the thread should get a signal.  This is
376262528Semaste	 * mostly to satisfy the "record" rump sigmodel.
377262528Semaste	 */
378262528Semaste	mutex_enter(newlwp->l_proc->p_lock);
379262528Semaste	if (sigispending(newlwp, 0)) {
380276479Sdim		newlwp->l_flag |= LW_PENDSIG;
381276479Sdim	}
382276479Sdim	mutex_exit(newlwp->l_proc->p_lock);
383276479Sdim
384280031Sdim	l->l_mutex = &unruntime_lock;
385276479Sdim	l->l_pflag &= ~LP_RUNNING;
386296417Sdim	l->l_flag &= ~LW_PENDSIG;
387296417Sdim	l->l_stat = LSRUN;
388296417Sdim
389276479Sdim	if (l->l_flag & LW_WEXIT) {
390280031Sdim		lwproc_freelwp(l);
391280031Sdim	}
392280031Sdim}
393280031Sdim
394280031Sdim/*
395254721Semaste * Mark the current thread to be released upon return from
396262528Semaste * kernel.
397296417Sdim */
398254721Semastevoid
399262528Semasterump_lwproc_releaselwp(void)
400262528Semaste{
401262528Semaste	struct lwp *l = curlwp;
402254721Semaste
403262528Semaste	if (l->l_refcnt == 0 || l->l_flag & LW_WEXIT)
404254721Semaste		panic("releasing non-pertinent lwp");
405262528Semaste
406262528Semaste	rump__lwproc_lwprele();
407262528Semaste	KASSERT(l->l_refcnt == 0 && (l->l_flag & LW_WEXIT));
408254721Semaste}
409296417Sdim
410296417Sdim/*
411296417Sdim * In-kernel routines used to add and remove references for the
412262528Semaste * current thread.  The main purpose is to make it possible for
413254721Semaste * implicit threads to persist over scheduling operations in
414262528Semaste * rump kernel drivers.  Note that we don't need p_lock in a
415262528Semaste * rump kernel, since we do refcounting only for curlwp.
416262528Semaste */
417296417Sdimvoid
418296417Sdimrump__lwproc_lwphold(void)
419296417Sdim{
420262528Semaste	struct lwp *l = curlwp;
421262528Semaste
422262528Semaste	l->l_refcnt++;
423262528Semaste	l->l_flag &= ~LW_WEXIT;
424262528Semaste}
425262528Semaste
426262528Semastevoid
427262528Semasterump__lwproc_lwprele(void)
428262528Semaste{
429262528Semaste	struct lwp *l = curlwp;
430262528Semaste
431262528Semaste	l->l_refcnt--;
432262528Semaste	if (l->l_refcnt == 0)
433262528Semaste		l->l_flag |= LW_WEXIT;
434262528Semaste}
435262528Semaste
436262528Semastestruct lwp *
437262528Semasterump_lwproc_curlwp(void)
438262528Semaste{
439262528Semaste	struct lwp *l = curlwp;
440262528Semaste
441254721Semaste	if (l->l_flag & LW_WEXIT)
442254721Semaste		return NULL;
443254721Semaste	return l;
444254721Semaste}
445254721Semaste
446280031Sdim/* this interface is under construction (like the proverbial 90's web page) */
447280031Sdimint rump_i_know_what_i_am_doing_with_sysents = 0;
448280031Sdimvoid
449280031Sdimrump_lwproc_sysent_usenative()
450262528Semaste{
451262528Semaste
452262528Semaste	if (!rump_i_know_what_i_am_doing_with_sysents)
453254721Semaste		panic("don't use rump_lwproc_sysent_usenative()");
454254721Semaste	curproc->p_emul = &emul_netbsd;
455280031Sdim}
456254721Semaste