lwproc.c revision 1.12
1144518Sdavidxu/*      $NetBSD: lwproc.c,v 1.12 2011/01/28 16:58:28 pooka Exp $	*/
2144518Sdavidxu
3144518Sdavidxu/*
4144518Sdavidxu * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5144518Sdavidxu *
6144518Sdavidxu * Redistribution and use in source and binary forms, with or without
7144518Sdavidxu * modification, are permitted provided that the following conditions
8144518Sdavidxu * are met:
9144518Sdavidxu * 1. Redistributions of source code must retain the above copyright
10144518Sdavidxu *    notice, this list of conditions and the following disclaimer.
11144518Sdavidxu * 2. Redistributions in binary form must reproduce the above copyright
12144518Sdavidxu *    notice, this list of conditions and the following disclaimer in the
13144518Sdavidxu *    documentation and/or other materials provided with the distribution.
14144518Sdavidxu *
15144518Sdavidxu * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16144518Sdavidxu * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17144518Sdavidxu * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18144518Sdavidxu * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19144518Sdavidxu * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20144518Sdavidxu * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21144518Sdavidxu * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22144518Sdavidxu * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23144518Sdavidxu * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24144518Sdavidxu * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25144518Sdavidxu * SUCH DAMAGE.
26144518Sdavidxu */
27144518Sdavidxu
28144518Sdavidxu#include <sys/cdefs.h>
29144518Sdavidxu__KERNEL_RCSID(0, "$NetBSD: lwproc.c,v 1.12 2011/01/28 16:58:28 pooka Exp $");
30144518Sdavidxu
31144518Sdavidxu#include <sys/param.h>
32144518Sdavidxu#include <sys/atomic.h>
33144518Sdavidxu#include <sys/filedesc.h>
34144518Sdavidxu#include <sys/kauth.h>
35144518Sdavidxu#include <sys/kmem.h>
36144518Sdavidxu#include <sys/lwp.h>
37144518Sdavidxu#include <sys/pool.h>
38144518Sdavidxu#include <sys/proc.h>
39144518Sdavidxu#include <sys/queue.h>
40144518Sdavidxu#include <sys/resourcevar.h>
41144518Sdavidxu#include <sys/uidinfo.h>
42144518Sdavidxu
43144518Sdavidxu#include <rump/rumpuser.h>
44144518Sdavidxu
45144518Sdavidxu#include "rump_private.h"
46144518Sdavidxu
47144518Sdavidxustatic void
48144518Sdavidxulwproc_proc_free(struct proc *p)
49144518Sdavidxu{
50144518Sdavidxu	kauth_cred_t cred;
51144518Sdavidxu
52144518Sdavidxu	mutex_enter(proc_lock);
53144518Sdavidxu
54144518Sdavidxu	KASSERT(p->p_nlwps == 0);
55144518Sdavidxu	KASSERT(LIST_EMPTY(&p->p_lwps));
56144518Sdavidxu	KASSERT(p->p_stat == SACTIVE || p->p_stat == SDYING ||
57144518Sdavidxu	    p->p_stat == SDEAD);
58144518Sdavidxu
59144518Sdavidxu	LIST_REMOVE(p, p_list);
60144518Sdavidxu	LIST_REMOVE(p, p_sibling);
61144518Sdavidxu	proc_free_pid(p->p_pid); /* decrements nprocs */
62144518Sdavidxu	proc_leavepgrp(p); /* releases proc_lock */
63144518Sdavidxu
64144518Sdavidxu	cred = p->p_cred;
65144518Sdavidxu	chgproccnt(kauth_cred_getuid(cred), -1);
66144518Sdavidxu	if (rump_proc_vfs_release)
67144518Sdavidxu		rump_proc_vfs_release(p);
68144518Sdavidxu
69144518Sdavidxu	limfree(p->p_limit);
70144518Sdavidxu	pstatsfree(p->p_stats);
71144518Sdavidxu	kauth_cred_free(p->p_cred);
72144518Sdavidxu	proc_finispecific(p);
73144518Sdavidxu
74144518Sdavidxu	mutex_obj_free(p->p_lock);
75144518Sdavidxu	mutex_destroy(&p->p_stmutex);
76144518Sdavidxu	mutex_destroy(&p->p_auxlock);
77144518Sdavidxu	rw_destroy(&p->p_reflock);
78	cv_destroy(&p->p_waitcv);
79	cv_destroy(&p->p_lwpcv);
80
81	/* non-kernel vmspaces are not shared */
82	if (!RUMP_LOCALPROC_P(p)) {
83		KASSERT(p->p_vmspace->vm_refcnt == 1);
84		kmem_free(p->p_vmspace, sizeof(*p->p_vmspace));
85	}
86
87	proc_free_mem(p);
88}
89
90/*
91 * Allocate a new process.  Mostly mimic fork by
92 * copying the properties of the parent.  However, there are some
93 * differences.  For example, we never share the fd table.
94 *
95 * Switch to the new lwp and return a pointer to it.
96 */
97static struct proc *
98lwproc_newproc(struct proc *parent, int flags)
99{
100	uid_t uid = kauth_cred_getuid(parent->p_cred);
101	struct proc *p;
102
103	/* maxproc not enforced */
104	atomic_inc_uint(&nprocs);
105
106	/* allocate process */
107	p = proc_alloc();
108	memset(&p->p_startzero, 0,
109	    offsetof(struct proc, p_endzero)
110	      - offsetof(struct proc, p_startzero));
111	memcpy(&p->p_startcopy, &parent->p_startcopy,
112	    offsetof(struct proc, p_endcopy)
113	      - offsetof(struct proc, p_startcopy));
114
115	p->p_stats = pstatscopy(parent->p_stats);
116
117	p->p_vmspace = vmspace_kernel();
118	p->p_emul = &emul_netbsd;
119	strcpy(p->p_comm, "rumproc");
120
121	if ((flags & RUMP_RFCFDG) == 0)
122		KASSERT(parent == curproc);
123	if (flags & RUMP_RFFDG)
124		p->p_fd = fd_copy();
125	else if (flags & RUMP_RFCFDG)
126		p->p_fd = fd_init(NULL);
127	else
128		fd_share(p);
129
130	lim_addref(parent->p_limit);
131	p->p_limit = parent->p_limit;
132
133	LIST_INIT(&p->p_lwps);
134	LIST_INIT(&p->p_children);
135
136	p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
137	mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_NONE);
138	mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
139	rw_init(&p->p_reflock);
140	cv_init(&p->p_waitcv, "pwait");
141	cv_init(&p->p_lwpcv, "plwp");
142
143	p->p_pptr = parent;
144	p->p_ppid = parent->p_pid;
145	p->p_stat = SACTIVE;
146
147	kauth_proc_fork(parent, p);
148
149	/* initialize cwd in rump kernels with vfs */
150	if (rump_proc_vfs_init)
151		rump_proc_vfs_init(p);
152
153	chgproccnt(uid, 1); /* not enforced */
154
155	/* publish proc various proc lists */
156	mutex_enter(proc_lock);
157	LIST_INSERT_HEAD(&allproc, p, p_list);
158	LIST_INSERT_HEAD(&parent->p_children, p, p_sibling);
159	LIST_INSERT_AFTER(parent, p, p_pglist);
160	mutex_exit(proc_lock);
161
162	return p;
163}
164
165static void
166lwproc_freelwp(struct lwp *l)
167{
168	struct proc *p;
169	bool freeproc;
170
171	p = l->l_proc;
172	mutex_enter(p->p_lock);
173
174	/* XXX: l_refcnt */
175	KASSERT(l->l_flag & LW_WEXIT);
176	KASSERT(l->l_refcnt == 0);
177
178	/* ok, zero references, continue with nuke */
179	LIST_REMOVE(l, l_sibling);
180	KASSERT(p->p_nlwps >= 1);
181	if (--p->p_nlwps == 0) {
182		KASSERT(p != &proc0);
183		p->p_stat = SDEAD;
184	}
185	freeproc = p->p_nlwps == 0;
186	cv_broadcast(&p->p_lwpcv); /* nobody sleeps on this in rump? */
187	kauth_cred_free(l->l_cred);
188	mutex_exit(p->p_lock);
189
190	mutex_enter(proc_lock);
191	LIST_REMOVE(l, l_list);
192	mutex_exit(proc_lock);
193
194	if (l->l_name)
195		kmem_free(l->l_name, MAXCOMLEN);
196	lwp_finispecific(l);
197
198	kmem_free(l, sizeof(*l));
199
200	if (p->p_stat == SDEAD)
201		lwproc_proc_free(p);
202}
203
204extern kmutex_t unruntime_lock;
205
206/*
207 * called with p_lock held, releases lock before return
208 */
209static void
210lwproc_makelwp(struct proc *p, struct lwp *l, bool doswitch, bool procmake)
211{
212
213	p->p_nlwps++;
214	l->l_refcnt = 1;
215	l->l_proc = p;
216
217	l->l_lid = p->p_nlwpid++;
218	LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
219	mutex_exit(p->p_lock);
220
221	lwp_update_creds(l);
222
223	l->l_fd = p->p_fd;
224	l->l_cpu = rump_cpu;
225	l->l_target_cpu = rump_cpu; /* Initial target CPU always the same */
226	l->l_stat = LSRUN;
227	l->l_mutex = &unruntime_lock;
228	TAILQ_INIT(&l->l_ld_locks);
229
230	lwp_initspecific(l);
231
232	if (doswitch) {
233		rump_lwproc_switch(l);
234	}
235
236	/* filedesc already has refcount 1 when process is created */
237	if (!procmake) {
238		fd_hold(l);
239	}
240
241	mutex_enter(proc_lock);
242	LIST_INSERT_HEAD(&alllwp, l, l_list);
243	mutex_exit(proc_lock);
244}
245
246struct lwp *
247rump__lwproc_alloclwp(struct proc *p)
248{
249	struct lwp *l;
250	bool newproc = false;
251
252	if (p == NULL) {
253		p = lwproc_newproc(&proc0, 0);
254		newproc = true;
255	}
256
257	l = kmem_zalloc(sizeof(*l), KM_SLEEP);
258
259	mutex_enter(p->p_lock);
260	lwproc_makelwp(p, l, false, newproc);
261
262	return l;
263}
264
265int
266rump_lwproc_newlwp(pid_t pid)
267{
268	struct proc *p;
269	struct lwp *l;
270
271	l = kmem_zalloc(sizeof(*l), KM_SLEEP);
272	mutex_enter(proc_lock);
273	p = proc_find_raw(pid);
274	if (p == NULL) {
275		mutex_exit(proc_lock);
276		kmem_free(l, sizeof(*l));
277		return ESRCH;
278	}
279	mutex_enter(p->p_lock);
280	mutex_exit(proc_lock);
281	lwproc_makelwp(p, l, true, false);
282
283	return 0;
284}
285
286int
287rump_lwproc_rfork(int flags)
288{
289	struct proc *p;
290	struct lwp *l;
291
292	if (flags & ~(RUMP_RFFDG|RUMP_RFCFDG) ||
293	    (~flags & (RUMP_RFFDG|RUMP_RFCFDG)) == 0)
294		return EINVAL;
295
296	p = lwproc_newproc(curproc, flags);
297	l = kmem_zalloc(sizeof(*l), KM_SLEEP);
298	mutex_enter(p->p_lock);
299	lwproc_makelwp(p, l, true, true);
300
301	return 0;
302}
303
304/*
305 * Switch to a new process/thread.  Release previous one if
306 * deemed to be exiting.  This is considered a slow path for
307 * rump kernel entry.
308 */
309void
310rump_lwproc_switch(struct lwp *newlwp)
311{
312	struct lwp *l = curlwp;
313
314	KASSERT(!(l->l_flag & LW_WEXIT) || newlwp);
315
316	if (__predict_false(newlwp && (newlwp->l_pflag & LP_RUNNING)))
317		panic("lwp %p (%d:%d) already running",
318		    newlwp, newlwp->l_proc->p_pid, newlwp->l_lid);
319
320	if (newlwp == NULL) {
321		l->l_pflag &= ~LP_RUNNING;
322		l->l_flag |= LW_RUMP_CLEAR;
323		return;
324	}
325
326	/* fd_free() must be called from curlwp context.  talk about ugh */
327	if (l->l_flag & LW_WEXIT) {
328		fd_free();
329	}
330
331	rumpuser_set_curlwp(NULL);
332
333	newlwp->l_cpu = newlwp->l_target_cpu = l->l_cpu;
334	newlwp->l_mutex = l->l_mutex;
335	newlwp->l_pflag |= LP_RUNNING;
336
337	rumpuser_set_curlwp(newlwp);
338
339	/*
340	 * Check if the thread should get a signal.  This is
341	 * mostly to satisfy the "record" rump sigmodel.
342	 */
343	mutex_enter(newlwp->l_proc->p_lock);
344	if (sigispending(newlwp, 0)) {
345		newlwp->l_flag |= LW_PENDSIG;
346	}
347	mutex_exit(newlwp->l_proc->p_lock);
348
349	l->l_mutex = &unruntime_lock;
350	l->l_cpu = NULL;
351	l->l_pflag &= ~LP_RUNNING;
352	l->l_flag &= ~LW_PENDSIG;
353
354	if (l->l_flag & LW_WEXIT) {
355		lwproc_freelwp(l);
356	}
357}
358
359void
360rump_lwproc_releaselwp(void)
361{
362	struct proc *p;
363	struct lwp *l = curlwp;
364
365	if (l->l_refcnt == 0 && l->l_flag & LW_WEXIT)
366		panic("releasing non-pertinent lwp");
367
368	p = l->l_proc;
369	mutex_enter(p->p_lock);
370	KASSERT(l->l_refcnt != 0);
371	l->l_refcnt--;
372	mutex_exit(p->p_lock);
373	l->l_flag |= LW_WEXIT; /* will be released when unscheduled */
374}
375
376struct lwp *
377rump_lwproc_curlwp(void)
378{
379	struct lwp *l = curlwp;
380
381	if (l->l_flag & LW_WEXIT)
382		return NULL;
383	return l;
384}
385