Deleted Added
full compact
kern_fork.c (185647) kern_fork.c (191816)
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
35 */
36
37#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/kern_fork.c 185647 2008-12-05 20:50:24Z kib $");
38__FBSDID("$FreeBSD: head/sys/kern/kern_fork.c 191816 2009-05-05 10:56:12Z zec $");
39
40#include "opt_kdtrace.h"
41#include "opt_ktrace.h"
42#include "opt_mac.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/eventhandler.h>
48#include <sys/filedesc.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/sysctl.h>
52#include <sys/lock.h>
53#include <sys/malloc.h>
54#include <sys/mutex.h>
55#include <sys/priv.h>
56#include <sys/proc.h>
57#include <sys/jail.h>
58#include <sys/pioctl.h>
59#include <sys/resourcevar.h>
60#include <sys/sched.h>
61#include <sys/syscall.h>
62#include <sys/vmmeter.h>
63#include <sys/vnode.h>
64#include <sys/acct.h>
65#include <sys/ktr.h>
66#include <sys/ktrace.h>
67#include <sys/unistd.h>
68#include <sys/sdt.h>
69#include <sys/sx.h>
70#include <sys/signalvar.h>
39
40#include "opt_kdtrace.h"
41#include "opt_ktrace.h"
42#include "opt_mac.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/eventhandler.h>
48#include <sys/filedesc.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/sysctl.h>
52#include <sys/lock.h>
53#include <sys/malloc.h>
54#include <sys/mutex.h>
55#include <sys/priv.h>
56#include <sys/proc.h>
57#include <sys/jail.h>
58#include <sys/pioctl.h>
59#include <sys/resourcevar.h>
60#include <sys/sched.h>
61#include <sys/syscall.h>
62#include <sys/vmmeter.h>
63#include <sys/vnode.h>
64#include <sys/acct.h>
65#include <sys/ktr.h>
66#include <sys/ktrace.h>
67#include <sys/unistd.h>
68#include <sys/sdt.h>
69#include <sys/sx.h>
70#include <sys/signalvar.h>
71#include <sys/vimage.h>
71
72#include <security/audit/audit.h>
73#include <security/mac/mac_framework.h>
74
75#include <vm/vm.h>
76#include <vm/pmap.h>
77#include <vm/vm_map.h>
78#include <vm/vm_extern.h>
79#include <vm/uma.h>
80
81#ifdef KDTRACE_HOOKS
82#include <sys/dtrace_bsd.h>
83dtrace_fork_func_t dtrace_fasttrap_fork;
84#endif
85
86SDT_PROVIDER_DECLARE(proc);
87SDT_PROBE_DEFINE(proc, kernel, , create);
88SDT_PROBE_ARGTYPE(proc, kernel, , create, 0, "struct proc *");
89SDT_PROBE_ARGTYPE(proc, kernel, , create, 1, "struct proc *");
90SDT_PROBE_ARGTYPE(proc, kernel, , create, 2, "int");
91
92#ifndef _SYS_SYSPROTO_H_
93struct fork_args {
94 int dummy;
95};
96#endif
97
98/* ARGSUSED */
99int
100fork(td, uap)
101 struct thread *td;
102 struct fork_args *uap;
103{
104 int error;
105 struct proc *p2;
106
107 error = fork1(td, RFFDG | RFPROC, 0, &p2);
108 if (error == 0) {
109 td->td_retval[0] = p2->p_pid;
110 td->td_retval[1] = 0;
111 }
112 return (error);
113}
114
115/* ARGSUSED */
116int
117vfork(td, uap)
118 struct thread *td;
119 struct vfork_args *uap;
120{
121 int error, flags;
122 struct proc *p2;
123
124#ifdef XEN
125 flags = RFFDG | RFPROC; /* validate that this is still an issue */
126#else
127 flags = RFFDG | RFPROC | RFPPWAIT | RFMEM;
128#endif
129 error = fork1(td, flags, 0, &p2);
130 if (error == 0) {
131 td->td_retval[0] = p2->p_pid;
132 td->td_retval[1] = 0;
133 }
134 return (error);
135}
136
137int
138rfork(td, uap)
139 struct thread *td;
140 struct rfork_args *uap;
141{
142 struct proc *p2;
143 int error;
144
145 /* Don't allow kernel-only flags. */
146 if ((uap->flags & RFKERNELONLY) != 0)
147 return (EINVAL);
148
149 AUDIT_ARG(fflags, uap->flags);
150 error = fork1(td, uap->flags, 0, &p2);
151 if (error == 0) {
152 td->td_retval[0] = p2 ? p2->p_pid : 0;
153 td->td_retval[1] = 0;
154 }
155 return (error);
156}
157
158int nprocs = 1; /* process 0 */
159int lastpid = 0;
160SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0,
161 "Last used PID");
162
163/*
164 * Random component to lastpid generation. We mix in a random factor to make
165 * it a little harder to predict. We sanity check the modulus value to avoid
166 * doing it in critical paths. Don't let it be too small or we pointlessly
167 * waste randomness entropy, and don't let it be impossibly large. Using a
168 * modulus that is too big causes a LOT more process table scans and slows
169 * down fork processing as the pidchecked caching is defeated.
170 */
171static int randompid = 0;
172
173static int
174sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
175{
176 int error, pid;
177
178 error = sysctl_wire_old_buffer(req, sizeof(int));
179 if (error != 0)
180 return(error);
181 sx_xlock(&allproc_lock);
182 pid = randompid;
183 error = sysctl_handle_int(oidp, &pid, 0, req);
184 if (error == 0 && req->newptr != NULL) {
185 if (pid < 0 || pid > PID_MAX - 100) /* out of range */
186 pid = PID_MAX - 100;
187 else if (pid < 2) /* NOP */
188 pid = 0;
189 else if (pid < 100) /* Make it reasonable */
190 pid = 100;
191 randompid = pid;
192 }
193 sx_xunlock(&allproc_lock);
194 return (error);
195}
196
197SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
198 0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
199
200int
201fork1(td, flags, pages, procp)
202 struct thread *td;
203 int flags;
204 int pages;
205 struct proc **procp;
206{
207 struct proc *p1, *p2, *pptr;
208 struct proc *newproc;
209 int ok, trypid;
210 static int curfail, pidchecked = 0;
211 static struct timeval lastfail;
212 struct filedesc *fd;
213 struct filedesc_to_leader *fdtol;
214 struct thread *td2;
215 struct sigacts *newsigacts;
216 struct vmspace *vm2;
217 int error;
218
219 /* Can't copy and clear. */
220 if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
221 return (EINVAL);
222
223 p1 = td->td_proc;
224
225 /*
226 * Here we don't create a new process, but we divorce
227 * certain parts of a process from itself.
228 */
229 if ((flags & RFPROC) == 0) {
230 if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
231 (flags & (RFCFDG | RFFDG))) {
232 PROC_LOCK(p1);
233 if (thread_single(SINGLE_BOUNDARY)) {
234 PROC_UNLOCK(p1);
235 return (ERESTART);
236 }
237 PROC_UNLOCK(p1);
238 }
239
240 error = vm_forkproc(td, NULL, NULL, NULL, flags);
241 if (error)
242 goto norfproc_fail;
243
244 /*
245 * Close all file descriptors.
246 */
247 if (flags & RFCFDG) {
248 struct filedesc *fdtmp;
249 fdtmp = fdinit(td->td_proc->p_fd);
250 fdfree(td);
251 p1->p_fd = fdtmp;
252 }
253
254 /*
255 * Unshare file descriptors (from parent).
256 */
257 if (flags & RFFDG)
258 fdunshare(p1, td);
259
260norfproc_fail:
261 if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
262 (flags & (RFCFDG | RFFDG))) {
263 PROC_LOCK(p1);
264 thread_single_end();
265 PROC_UNLOCK(p1);
266 }
267 *procp = NULL;
268 return (error);
269 }
270
271 /*
272 * XXX
273 * We did have single-threading code here
274 * however it proved un-needed and caused problems
275 */
276
277 vm2 = NULL;
278 /* Allocate new proc. */
279 newproc = uma_zalloc(proc_zone, M_WAITOK);
280 if (TAILQ_EMPTY(&newproc->p_threads)) {
281 td2 = thread_alloc();
282 if (td2 == NULL) {
283 error = ENOMEM;
284 goto fail1;
285 }
286 proc_linkup(newproc, td2);
287 } else
288 td2 = FIRST_THREAD_IN_PROC(newproc);
289
290 /* Allocate and switch to an alternate kstack if specified. */
291 if (pages != 0) {
292 if (!vm_thread_new_altkstack(td2, pages)) {
293 error = ENOMEM;
294 goto fail1;
295 }
296 }
297 if ((flags & RFMEM) == 0) {
298 vm2 = vmspace_fork(p1->p_vmspace);
299 if (vm2 == NULL) {
300 error = ENOMEM;
301 goto fail1;
302 }
303 }
304#ifdef MAC
305 mac_proc_init(newproc);
306#endif
307 knlist_init(&newproc->p_klist, &newproc->p_mtx, NULL, NULL, NULL);
308 STAILQ_INIT(&newproc->p_ktr);
309
310 /* We have to lock the process tree while we look for a pid. */
311 sx_slock(&proctree_lock);
312
313 /*
314 * Although process entries are dynamically created, we still keep
315 * a global limit on the maximum number we will create. Don't allow
316 * a nonprivileged user to use the last ten processes; don't let root
317 * exceed the limit. The variable nprocs is the current number of
318 * processes, maxproc is the limit.
319 */
320 sx_xlock(&allproc_lock);
321 if ((nprocs >= maxproc - 10 && priv_check_cred(td->td_ucred,
322 PRIV_MAXPROC, 0) != 0) || nprocs >= maxproc) {
323 error = EAGAIN;
324 goto fail;
325 }
326
327 /*
328 * Increment the count of procs running with this uid. Don't allow
329 * a nonprivileged user to exceed their current limit.
330 *
331 * XXXRW: Can we avoid privilege here if it's not needed?
332 */
333 error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0);
334 if (error == 0)
335 ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0);
336 else {
337 PROC_LOCK(p1);
338 ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
339 lim_cur(p1, RLIMIT_NPROC));
340 PROC_UNLOCK(p1);
341 }
342 if (!ok) {
343 error = EAGAIN;
344 goto fail;
345 }
346
347 /*
348 * Increment the nprocs resource before blocking can occur. There
349 * are hard-limits as to the number of processes that can run.
350 */
351 nprocs++;
352
353 /*
354 * Find an unused process ID. We remember a range of unused IDs
355 * ready to use (from lastpid+1 through pidchecked-1).
356 *
357 * If RFHIGHPID is set (used during system boot), do not allocate
358 * low-numbered pids.
359 */
360 trypid = lastpid + 1;
361 if (flags & RFHIGHPID) {
362 if (trypid < 10)
363 trypid = 10;
364 } else {
365 if (randompid)
366 trypid += arc4random() % randompid;
367 }
368retry:
369 /*
370 * If the process ID prototype has wrapped around,
371 * restart somewhat above 0, as the low-numbered procs
372 * tend to include daemons that don't exit.
373 */
374 if (trypid >= PID_MAX) {
375 trypid = trypid % PID_MAX;
376 if (trypid < 100)
377 trypid += 100;
378 pidchecked = 0;
379 }
380 if (trypid >= pidchecked) {
381 int doingzomb = 0;
382
383 pidchecked = PID_MAX;
384 /*
385 * Scan the active and zombie procs to check whether this pid
386 * is in use. Remember the lowest pid that's greater
387 * than trypid, so we can avoid checking for a while.
388 */
389 p2 = LIST_FIRST(&allproc);
390again:
391 for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
392 while (p2->p_pid == trypid ||
393 (p2->p_pgrp != NULL &&
394 (p2->p_pgrp->pg_id == trypid ||
395 (p2->p_session != NULL &&
396 p2->p_session->s_sid == trypid)))) {
397 trypid++;
398 if (trypid >= pidchecked)
399 goto retry;
400 }
401 if (p2->p_pid > trypid && pidchecked > p2->p_pid)
402 pidchecked = p2->p_pid;
403 if (p2->p_pgrp != NULL) {
404 if (p2->p_pgrp->pg_id > trypid &&
405 pidchecked > p2->p_pgrp->pg_id)
406 pidchecked = p2->p_pgrp->pg_id;
407 if (p2->p_session != NULL &&
408 p2->p_session->s_sid > trypid &&
409 pidchecked > p2->p_session->s_sid)
410 pidchecked = p2->p_session->s_sid;
411 }
412 }
413 if (!doingzomb) {
414 doingzomb = 1;
415 p2 = LIST_FIRST(&zombproc);
416 goto again;
417 }
418 }
419 sx_sunlock(&proctree_lock);
420
421 /*
422 * RFHIGHPID does not mess with the lastpid counter during boot.
423 */
424 if (flags & RFHIGHPID)
425 pidchecked = 0;
426 else
427 lastpid = trypid;
428
429 p2 = newproc;
430 p2->p_state = PRS_NEW; /* protect against others */
431 p2->p_pid = trypid;
432 /*
433 * Allow the scheduler to initialize the child.
434 */
435 thread_lock(td);
436 sched_fork(td, td2);
437 thread_unlock(td);
438 AUDIT_ARG(pid, p2->p_pid);
439 LIST_INSERT_HEAD(&allproc, p2, p_list);
440 LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
441
442 PROC_LOCK(p2);
443 PROC_LOCK(p1);
444
445 sx_xunlock(&allproc_lock);
446
447 bcopy(&p1->p_startcopy, &p2->p_startcopy,
448 __rangeof(struct proc, p_startcopy, p_endcopy));
449 pargs_hold(p2->p_args);
450 PROC_UNLOCK(p1);
451
452 bzero(&p2->p_startzero,
453 __rangeof(struct proc, p_startzero, p_endzero));
454
455 p2->p_ucred = crhold(td->td_ucred);
456
457 /* In case we are jailed tell the prison that we exist. */
458 if (jailed(p2->p_ucred))
459 prison_proc_hold(p2->p_ucred->cr_prison);
460
461 PROC_UNLOCK(p2);
462
463 /*
464 * Malloc things while we don't hold any locks.
465 */
466 if (flags & RFSIGSHARE)
467 newsigacts = NULL;
468 else
469 newsigacts = sigacts_alloc();
470
471 /*
472 * Copy filedesc.
473 */
474 if (flags & RFCFDG) {
475 fd = fdinit(p1->p_fd);
476 fdtol = NULL;
477 } else if (flags & RFFDG) {
478 fd = fdcopy(p1->p_fd);
479 fdtol = NULL;
480 } else {
481 fd = fdshare(p1->p_fd);
482 if (p1->p_fdtol == NULL)
483 p1->p_fdtol =
484 filedesc_to_leader_alloc(NULL,
485 NULL,
486 p1->p_leader);
487 if ((flags & RFTHREAD) != 0) {
488 /*
489 * Shared file descriptor table and
490 * shared process leaders.
491 */
492 fdtol = p1->p_fdtol;
493 FILEDESC_XLOCK(p1->p_fd);
494 fdtol->fdl_refcount++;
495 FILEDESC_XUNLOCK(p1->p_fd);
496 } else {
497 /*
498 * Shared file descriptor table, and
499 * different process leaders
500 */
501 fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
502 p1->p_fd,
503 p2);
504 }
505 }
506 /*
507 * Make a proc table entry for the new process.
508 * Start by zeroing the section of proc that is zero-initialized,
509 * then copy the section that is copied directly from the parent.
510 */
511
512 PROC_LOCK(p2);
513 PROC_LOCK(p1);
514
515 bzero(&td2->td_startzero,
516 __rangeof(struct thread, td_startzero, td_endzero));
517
518 bcopy(&td->td_startcopy, &td2->td_startcopy,
519 __rangeof(struct thread, td_startcopy, td_endcopy));
520
521 bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name));
522 td2->td_sigstk = td->td_sigstk;
523 td2->td_sigmask = td->td_sigmask;
524 td2->td_flags = TDF_INMEM;
525
72
73#include <security/audit/audit.h>
74#include <security/mac/mac_framework.h>
75
76#include <vm/vm.h>
77#include <vm/pmap.h>
78#include <vm/vm_map.h>
79#include <vm/vm_extern.h>
80#include <vm/uma.h>
81
82#ifdef KDTRACE_HOOKS
83#include <sys/dtrace_bsd.h>
84dtrace_fork_func_t dtrace_fasttrap_fork;
85#endif
86
87SDT_PROVIDER_DECLARE(proc);
88SDT_PROBE_DEFINE(proc, kernel, , create);
89SDT_PROBE_ARGTYPE(proc, kernel, , create, 0, "struct proc *");
90SDT_PROBE_ARGTYPE(proc, kernel, , create, 1, "struct proc *");
91SDT_PROBE_ARGTYPE(proc, kernel, , create, 2, "int");
92
93#ifndef _SYS_SYSPROTO_H_
94struct fork_args {
95 int dummy;
96};
97#endif
98
99/* ARGSUSED */
100int
101fork(td, uap)
102 struct thread *td;
103 struct fork_args *uap;
104{
105 int error;
106 struct proc *p2;
107
108 error = fork1(td, RFFDG | RFPROC, 0, &p2);
109 if (error == 0) {
110 td->td_retval[0] = p2->p_pid;
111 td->td_retval[1] = 0;
112 }
113 return (error);
114}
115
116/* ARGSUSED */
117int
118vfork(td, uap)
119 struct thread *td;
120 struct vfork_args *uap;
121{
122 int error, flags;
123 struct proc *p2;
124
125#ifdef XEN
126 flags = RFFDG | RFPROC; /* validate that this is still an issue */
127#else
128 flags = RFFDG | RFPROC | RFPPWAIT | RFMEM;
129#endif
130 error = fork1(td, flags, 0, &p2);
131 if (error == 0) {
132 td->td_retval[0] = p2->p_pid;
133 td->td_retval[1] = 0;
134 }
135 return (error);
136}
137
138int
139rfork(td, uap)
140 struct thread *td;
141 struct rfork_args *uap;
142{
143 struct proc *p2;
144 int error;
145
146 /* Don't allow kernel-only flags. */
147 if ((uap->flags & RFKERNELONLY) != 0)
148 return (EINVAL);
149
150 AUDIT_ARG(fflags, uap->flags);
151 error = fork1(td, uap->flags, 0, &p2);
152 if (error == 0) {
153 td->td_retval[0] = p2 ? p2->p_pid : 0;
154 td->td_retval[1] = 0;
155 }
156 return (error);
157}
158
159int nprocs = 1; /* process 0 */
160int lastpid = 0;
161SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0,
162 "Last used PID");
163
164/*
165 * Random component to lastpid generation. We mix in a random factor to make
166 * it a little harder to predict. We sanity check the modulus value to avoid
167 * doing it in critical paths. Don't let it be too small or we pointlessly
168 * waste randomness entropy, and don't let it be impossibly large. Using a
169 * modulus that is too big causes a LOT more process table scans and slows
170 * down fork processing as the pidchecked caching is defeated.
171 */
172static int randompid = 0;
173
174static int
175sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
176{
177 int error, pid;
178
179 error = sysctl_wire_old_buffer(req, sizeof(int));
180 if (error != 0)
181 return(error);
182 sx_xlock(&allproc_lock);
183 pid = randompid;
184 error = sysctl_handle_int(oidp, &pid, 0, req);
185 if (error == 0 && req->newptr != NULL) {
186 if (pid < 0 || pid > PID_MAX - 100) /* out of range */
187 pid = PID_MAX - 100;
188 else if (pid < 2) /* NOP */
189 pid = 0;
190 else if (pid < 100) /* Make it reasonable */
191 pid = 100;
192 randompid = pid;
193 }
194 sx_xunlock(&allproc_lock);
195 return (error);
196}
197
198SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
199 0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
200
201int
202fork1(td, flags, pages, procp)
203 struct thread *td;
204 int flags;
205 int pages;
206 struct proc **procp;
207{
208 struct proc *p1, *p2, *pptr;
209 struct proc *newproc;
210 int ok, trypid;
211 static int curfail, pidchecked = 0;
212 static struct timeval lastfail;
213 struct filedesc *fd;
214 struct filedesc_to_leader *fdtol;
215 struct thread *td2;
216 struct sigacts *newsigacts;
217 struct vmspace *vm2;
218 int error;
219
220 /* Can't copy and clear. */
221 if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
222 return (EINVAL);
223
224 p1 = td->td_proc;
225
226 /*
227 * Here we don't create a new process, but we divorce
228 * certain parts of a process from itself.
229 */
230 if ((flags & RFPROC) == 0) {
231 if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
232 (flags & (RFCFDG | RFFDG))) {
233 PROC_LOCK(p1);
234 if (thread_single(SINGLE_BOUNDARY)) {
235 PROC_UNLOCK(p1);
236 return (ERESTART);
237 }
238 PROC_UNLOCK(p1);
239 }
240
241 error = vm_forkproc(td, NULL, NULL, NULL, flags);
242 if (error)
243 goto norfproc_fail;
244
245 /*
246 * Close all file descriptors.
247 */
248 if (flags & RFCFDG) {
249 struct filedesc *fdtmp;
250 fdtmp = fdinit(td->td_proc->p_fd);
251 fdfree(td);
252 p1->p_fd = fdtmp;
253 }
254
255 /*
256 * Unshare file descriptors (from parent).
257 */
258 if (flags & RFFDG)
259 fdunshare(p1, td);
260
261norfproc_fail:
262 if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
263 (flags & (RFCFDG | RFFDG))) {
264 PROC_LOCK(p1);
265 thread_single_end();
266 PROC_UNLOCK(p1);
267 }
268 *procp = NULL;
269 return (error);
270 }
271
272 /*
273 * XXX
274 * We did have single-threading code here
275 * however it proved un-needed and caused problems
276 */
277
278 vm2 = NULL;
279 /* Allocate new proc. */
280 newproc = uma_zalloc(proc_zone, M_WAITOK);
281 if (TAILQ_EMPTY(&newproc->p_threads)) {
282 td2 = thread_alloc();
283 if (td2 == NULL) {
284 error = ENOMEM;
285 goto fail1;
286 }
287 proc_linkup(newproc, td2);
288 } else
289 td2 = FIRST_THREAD_IN_PROC(newproc);
290
291 /* Allocate and switch to an alternate kstack if specified. */
292 if (pages != 0) {
293 if (!vm_thread_new_altkstack(td2, pages)) {
294 error = ENOMEM;
295 goto fail1;
296 }
297 }
298 if ((flags & RFMEM) == 0) {
299 vm2 = vmspace_fork(p1->p_vmspace);
300 if (vm2 == NULL) {
301 error = ENOMEM;
302 goto fail1;
303 }
304 }
305#ifdef MAC
306 mac_proc_init(newproc);
307#endif
308 knlist_init(&newproc->p_klist, &newproc->p_mtx, NULL, NULL, NULL);
309 STAILQ_INIT(&newproc->p_ktr);
310
311 /* We have to lock the process tree while we look for a pid. */
312 sx_slock(&proctree_lock);
313
314 /*
315 * Although process entries are dynamically created, we still keep
316 * a global limit on the maximum number we will create. Don't allow
317 * a nonprivileged user to use the last ten processes; don't let root
318 * exceed the limit. The variable nprocs is the current number of
319 * processes, maxproc is the limit.
320 */
321 sx_xlock(&allproc_lock);
322 if ((nprocs >= maxproc - 10 && priv_check_cred(td->td_ucred,
323 PRIV_MAXPROC, 0) != 0) || nprocs >= maxproc) {
324 error = EAGAIN;
325 goto fail;
326 }
327
328 /*
329 * Increment the count of procs running with this uid. Don't allow
330 * a nonprivileged user to exceed their current limit.
331 *
332 * XXXRW: Can we avoid privilege here if it's not needed?
333 */
334 error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0);
335 if (error == 0)
336 ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0);
337 else {
338 PROC_LOCK(p1);
339 ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
340 lim_cur(p1, RLIMIT_NPROC));
341 PROC_UNLOCK(p1);
342 }
343 if (!ok) {
344 error = EAGAIN;
345 goto fail;
346 }
347
348 /*
349 * Increment the nprocs resource before blocking can occur. There
350 * are hard-limits as to the number of processes that can run.
351 */
352 nprocs++;
353
354 /*
355 * Find an unused process ID. We remember a range of unused IDs
356 * ready to use (from lastpid+1 through pidchecked-1).
357 *
358 * If RFHIGHPID is set (used during system boot), do not allocate
359 * low-numbered pids.
360 */
361 trypid = lastpid + 1;
362 if (flags & RFHIGHPID) {
363 if (trypid < 10)
364 trypid = 10;
365 } else {
366 if (randompid)
367 trypid += arc4random() % randompid;
368 }
369retry:
370 /*
371 * If the process ID prototype has wrapped around,
372 * restart somewhat above 0, as the low-numbered procs
373 * tend to include daemons that don't exit.
374 */
375 if (trypid >= PID_MAX) {
376 trypid = trypid % PID_MAX;
377 if (trypid < 100)
378 trypid += 100;
379 pidchecked = 0;
380 }
381 if (trypid >= pidchecked) {
382 int doingzomb = 0;
383
384 pidchecked = PID_MAX;
385 /*
386 * Scan the active and zombie procs to check whether this pid
387 * is in use. Remember the lowest pid that's greater
388 * than trypid, so we can avoid checking for a while.
389 */
390 p2 = LIST_FIRST(&allproc);
391again:
392 for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
393 while (p2->p_pid == trypid ||
394 (p2->p_pgrp != NULL &&
395 (p2->p_pgrp->pg_id == trypid ||
396 (p2->p_session != NULL &&
397 p2->p_session->s_sid == trypid)))) {
398 trypid++;
399 if (trypid >= pidchecked)
400 goto retry;
401 }
402 if (p2->p_pid > trypid && pidchecked > p2->p_pid)
403 pidchecked = p2->p_pid;
404 if (p2->p_pgrp != NULL) {
405 if (p2->p_pgrp->pg_id > trypid &&
406 pidchecked > p2->p_pgrp->pg_id)
407 pidchecked = p2->p_pgrp->pg_id;
408 if (p2->p_session != NULL &&
409 p2->p_session->s_sid > trypid &&
410 pidchecked > p2->p_session->s_sid)
411 pidchecked = p2->p_session->s_sid;
412 }
413 }
414 if (!doingzomb) {
415 doingzomb = 1;
416 p2 = LIST_FIRST(&zombproc);
417 goto again;
418 }
419 }
420 sx_sunlock(&proctree_lock);
421
422 /*
423 * RFHIGHPID does not mess with the lastpid counter during boot.
424 */
425 if (flags & RFHIGHPID)
426 pidchecked = 0;
427 else
428 lastpid = trypid;
429
430 p2 = newproc;
431 p2->p_state = PRS_NEW; /* protect against others */
432 p2->p_pid = trypid;
433 /*
434 * Allow the scheduler to initialize the child.
435 */
436 thread_lock(td);
437 sched_fork(td, td2);
438 thread_unlock(td);
439 AUDIT_ARG(pid, p2->p_pid);
440 LIST_INSERT_HEAD(&allproc, p2, p_list);
441 LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
442
443 PROC_LOCK(p2);
444 PROC_LOCK(p1);
445
446 sx_xunlock(&allproc_lock);
447
448 bcopy(&p1->p_startcopy, &p2->p_startcopy,
449 __rangeof(struct proc, p_startcopy, p_endcopy));
450 pargs_hold(p2->p_args);
451 PROC_UNLOCK(p1);
452
453 bzero(&p2->p_startzero,
454 __rangeof(struct proc, p_startzero, p_endzero));
455
456 p2->p_ucred = crhold(td->td_ucred);
457
458 /* In case we are jailed tell the prison that we exist. */
459 if (jailed(p2->p_ucred))
460 prison_proc_hold(p2->p_ucred->cr_prison);
461
462 PROC_UNLOCK(p2);
463
464 /*
465 * Malloc things while we don't hold any locks.
466 */
467 if (flags & RFSIGSHARE)
468 newsigacts = NULL;
469 else
470 newsigacts = sigacts_alloc();
471
472 /*
473 * Copy filedesc.
474 */
475 if (flags & RFCFDG) {
476 fd = fdinit(p1->p_fd);
477 fdtol = NULL;
478 } else if (flags & RFFDG) {
479 fd = fdcopy(p1->p_fd);
480 fdtol = NULL;
481 } else {
482 fd = fdshare(p1->p_fd);
483 if (p1->p_fdtol == NULL)
484 p1->p_fdtol =
485 filedesc_to_leader_alloc(NULL,
486 NULL,
487 p1->p_leader);
488 if ((flags & RFTHREAD) != 0) {
489 /*
490 * Shared file descriptor table and
491 * shared process leaders.
492 */
493 fdtol = p1->p_fdtol;
494 FILEDESC_XLOCK(p1->p_fd);
495 fdtol->fdl_refcount++;
496 FILEDESC_XUNLOCK(p1->p_fd);
497 } else {
498 /*
499 * Shared file descriptor table, and
500 * different process leaders
501 */
502 fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
503 p1->p_fd,
504 p2);
505 }
506 }
507 /*
508 * Make a proc table entry for the new process.
509 * Start by zeroing the section of proc that is zero-initialized,
510 * then copy the section that is copied directly from the parent.
511 */
512
513 PROC_LOCK(p2);
514 PROC_LOCK(p1);
515
516 bzero(&td2->td_startzero,
517 __rangeof(struct thread, td_startzero, td_endzero));
518
519 bcopy(&td->td_startcopy, &td2->td_startcopy,
520 __rangeof(struct thread, td_startcopy, td_endcopy));
521
522 bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name));
523 td2->td_sigstk = td->td_sigstk;
524 td2->td_sigmask = td->td_sigmask;
525 td2->td_flags = TDF_INMEM;
526
527#ifdef VIMAGE
528 td2->td_vnet = NULL;
529 td2->td_vnet_lpush = NULL;
530#endif
531
526 /*
527 * Duplicate sub-structures as needed.
528 * Increase reference counts on shared objects.
529 */
530 p2->p_flag = P_INMEM;
531 p2->p_swtick = ticks;
532 if (p1->p_flag & P_PROFIL)
533 startprofclock(p2);
534 td2->td_ucred = crhold(p2->p_ucred);
535
536 if (flags & RFSIGSHARE) {
537 p2->p_sigacts = sigacts_hold(p1->p_sigacts);
538 } else {
539 sigacts_copy(newsigacts, p1->p_sigacts);
540 p2->p_sigacts = newsigacts;
541 }
542 if (flags & RFLINUXTHPN)
543 p2->p_sigparent = SIGUSR1;
544 else
545 p2->p_sigparent = SIGCHLD;
546
547 p2->p_textvp = p1->p_textvp;
548 p2->p_fd = fd;
549 p2->p_fdtol = fdtol;
550
551 /*
552 * p_limit is copy-on-write. Bump its refcount.
553 */
554 lim_fork(p1, p2);
555
556 pstats_fork(p1->p_stats, p2->p_stats);
557
558 PROC_UNLOCK(p1);
559 PROC_UNLOCK(p2);
560
561 /* Bump references to the text vnode (for procfs) */
562 if (p2->p_textvp)
563 vref(p2->p_textvp);
564
565 /*
566 * Set up linkage for kernel based threading.
567 */
568 if ((flags & RFTHREAD) != 0) {
569 mtx_lock(&ppeers_lock);
570 p2->p_peers = p1->p_peers;
571 p1->p_peers = p2;
572 p2->p_leader = p1->p_leader;
573 mtx_unlock(&ppeers_lock);
574 PROC_LOCK(p1->p_leader);
575 if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
576 PROC_UNLOCK(p1->p_leader);
577 /*
578 * The task leader is exiting, so process p1 is
579 * going to be killed shortly. Since p1 obviously
580 * isn't dead yet, we know that the leader is either
581 * sending SIGKILL's to all the processes in this
582 * task or is sleeping waiting for all the peers to
583 * exit. We let p1 complete the fork, but we need
584 * to go ahead and kill the new process p2 since
585 * the task leader may not get a chance to send
586 * SIGKILL to it. We leave it on the list so that
587 * the task leader will wait for this new process
588 * to commit suicide.
589 */
590 PROC_LOCK(p2);
591 psignal(p2, SIGKILL);
592 PROC_UNLOCK(p2);
593 } else
594 PROC_UNLOCK(p1->p_leader);
595 } else {
596 p2->p_peers = NULL;
597 p2->p_leader = p2;
598 }
599
600 sx_xlock(&proctree_lock);
601 PGRP_LOCK(p1->p_pgrp);
602 PROC_LOCK(p2);
603 PROC_LOCK(p1);
604
605 /*
606 * Preserve some more flags in subprocess. P_PROFIL has already
607 * been preserved.
608 */
609 p2->p_flag |= p1->p_flag & P_SUGID;
610 td2->td_pflags |= td->td_pflags & TDP_ALTSTACK;
611 SESS_LOCK(p1->p_session);
612 if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
613 p2->p_flag |= P_CONTROLT;
614 SESS_UNLOCK(p1->p_session);
615 if (flags & RFPPWAIT)
616 p2->p_flag |= P_PPWAIT;
617
618 p2->p_pgrp = p1->p_pgrp;
619 LIST_INSERT_AFTER(p1, p2, p_pglist);
620 PGRP_UNLOCK(p1->p_pgrp);
621 LIST_INIT(&p2->p_children);
622
623 callout_init(&p2->p_itcallout, CALLOUT_MPSAFE);
624
625#ifdef KTRACE
626 /*
627 * Copy traceflag and tracefile if enabled.
628 */
629 mtx_lock(&ktrace_mtx);
630 KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
631 if (p1->p_traceflag & KTRFAC_INHERIT) {
632 p2->p_traceflag = p1->p_traceflag;
633 if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
634 VREF(p2->p_tracevp);
635 KASSERT(p1->p_tracecred != NULL,
636 ("ktrace vnode with no cred"));
637 p2->p_tracecred = crhold(p1->p_tracecred);
638 }
639 }
640 mtx_unlock(&ktrace_mtx);
641#endif
642
643 /*
644 * If PF_FORK is set, the child process inherits the
645 * procfs ioctl flags from its parent.
646 */
647 if (p1->p_pfsflags & PF_FORK) {
648 p2->p_stops = p1->p_stops;
649 p2->p_pfsflags = p1->p_pfsflags;
650 }
651
652#ifdef KDTRACE_HOOKS
653 /*
654 * Tell the DTrace fasttrap provider about the new process
655 * if it has registered an interest.
656 */
657 if (dtrace_fasttrap_fork)
658 dtrace_fasttrap_fork(p1, p2);
659#endif
660
661 /*
662 * This begins the section where we must prevent the parent
663 * from being swapped.
664 */
665 _PHOLD(p1);
666 PROC_UNLOCK(p1);
667
668 /*
669 * Attach the new process to its parent.
670 *
671 * If RFNOWAIT is set, the newly created process becomes a child
672 * of init. This effectively disassociates the child from the
673 * parent.
674 */
675 if (flags & RFNOWAIT)
676 pptr = initproc;
677 else
678 pptr = p1;
679 p2->p_pptr = pptr;
680 LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
681 sx_xunlock(&proctree_lock);
682
683 /* Inform accounting that we have forked. */
684 p2->p_acflag = AFORK;
685 PROC_UNLOCK(p2);
686
687 /*
688 * Finish creating the child process. It will return via a different
689 * execution path later. (ie: directly into user mode)
690 */
691 vm_forkproc(td, p2, td2, vm2, flags);
692
693 if (flags == (RFFDG | RFPROC)) {
694 PCPU_INC(cnt.v_forks);
695 PCPU_ADD(cnt.v_forkpages, p2->p_vmspace->vm_dsize +
696 p2->p_vmspace->vm_ssize);
697 } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
698 PCPU_INC(cnt.v_vforks);
699 PCPU_ADD(cnt.v_vforkpages, p2->p_vmspace->vm_dsize +
700 p2->p_vmspace->vm_ssize);
701 } else if (p1 == &proc0) {
702 PCPU_INC(cnt.v_kthreads);
703 PCPU_ADD(cnt.v_kthreadpages, p2->p_vmspace->vm_dsize +
704 p2->p_vmspace->vm_ssize);
705 } else {
706 PCPU_INC(cnt.v_rforks);
707 PCPU_ADD(cnt.v_rforkpages, p2->p_vmspace->vm_dsize +
708 p2->p_vmspace->vm_ssize);
709 }
710
711 /*
712 * Both processes are set up, now check if any loadable modules want
713 * to adjust anything.
714 * What if they have an error? XXX
715 */
716 EVENTHANDLER_INVOKE(process_fork, p1, p2, flags);
717
718 /*
719 * Set the child start time and mark the process as being complete.
720 */
721 microuptime(&p2->p_stats->p_start);
722 PROC_SLOCK(p2);
723 p2->p_state = PRS_NORMAL;
724 PROC_SUNLOCK(p2);
725
726 /*
727 * If RFSTOPPED not requested, make child runnable and add to
728 * run queue.
729 */
730 if ((flags & RFSTOPPED) == 0) {
731 thread_lock(td2);
732 TD_SET_CAN_RUN(td2);
733 sched_add(td2, SRQ_BORING);
734 thread_unlock(td2);
735 }
736
737 /*
738 * Now can be swapped.
739 */
740 PROC_LOCK(p1);
741 _PRELE(p1);
742 PROC_UNLOCK(p1);
743
744 /*
745 * Tell any interested parties about the new process.
746 */
747 knote_fork(&p1->p_klist, p2->p_pid);
748 SDT_PROBE(proc, kernel, , create, p2, p1, flags, 0, 0);
749
750 /*
751 * Preserve synchronization semantics of vfork. If waiting for
752 * child to exec or exit, set P_PPWAIT on child, and sleep on our
753 * proc (in case of exit).
754 */
755 PROC_LOCK(p2);
756 while (p2->p_flag & P_PPWAIT)
757 cv_wait(&p2->p_pwait, &p2->p_mtx);
758 PROC_UNLOCK(p2);
759
760 /*
761 * Return child proc pointer to parent.
762 */
763 *procp = p2;
764 return (0);
765fail:
766 sx_sunlock(&proctree_lock);
767 if (ppsratecheck(&lastfail, &curfail, 1))
768 printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",
769 td->td_ucred->cr_ruid);
770 sx_xunlock(&allproc_lock);
771#ifdef MAC
772 mac_proc_destroy(newproc);
773#endif
774fail1:
775 if (vm2 != NULL)
776 vmspace_free(vm2);
777 uma_zfree(proc_zone, newproc);
778 pause("fork", hz / 2);
779 return (error);
780}
781
782/*
783 * Handle the return of a child process from fork1(). This function
784 * is called from the MD fork_trampoline() entry point.
785 */
786void
787fork_exit(callout, arg, frame)
788 void (*callout)(void *, struct trapframe *);
789 void *arg;
790 struct trapframe *frame;
791{
792 struct proc *p;
793 struct thread *td;
794 struct thread *dtd;
795
796 td = curthread;
797 p = td->td_proc;
798 KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new"));
799
800 CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)",
801 td, td->td_sched, p->p_pid, td->td_name);
802
803 sched_fork_exit(td);
804 /*
805 * Processes normally resume in mi_switch() after being
806 * cpu_switch()'ed to, but when children start up they arrive here
807 * instead, so we must do much the same things as mi_switch() would.
808 */
809 if ((dtd = PCPU_GET(deadthread))) {
810 PCPU_SET(deadthread, NULL);
811 thread_stash(dtd);
812 }
813 thread_unlock(td);
814
815 /*
816 * cpu_set_fork_handler intercepts this function call to
817 * have this call a non-return function to stay in kernel mode.
818 * initproc has its own fork handler, but it does return.
819 */
820 KASSERT(callout != NULL, ("NULL callout in fork_exit"));
821 callout(arg, frame);
822
823 /*
824 * Check if a kernel thread misbehaved and returned from its main
825 * function.
826 */
827 if (p->p_flag & P_KTHREAD) {
828 printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n",
829 td->td_name, p->p_pid);
830 kproc_exit(0);
831 }
832 mtx_assert(&Giant, MA_NOTOWNED);
833
834 EVENTHANDLER_INVOKE(schedtail, p);
835}
836
837/*
838 * Simplified back end of syscall(), used when returning from fork()
839 * directly into user mode. Giant is not held on entry, and must not
840 * be held on return. This function is passed in to fork_exit() as the
841 * first parameter and is called when returning to a new userland process.
842 */
843void
844fork_return(td, frame)
845 struct thread *td;
846 struct trapframe *frame;
847{
848
849 userret(td, frame);
850#ifdef KTRACE
851 if (KTRPOINT(td, KTR_SYSRET))
852 ktrsysret(SYS_fork, 0, 0);
853#endif
854 mtx_assert(&Giant, MA_NOTOWNED);
855}
532 /*
533 * Duplicate sub-structures as needed.
534 * Increase reference counts on shared objects.
535 */
536 p2->p_flag = P_INMEM;
537 p2->p_swtick = ticks;
538 if (p1->p_flag & P_PROFIL)
539 startprofclock(p2);
540 td2->td_ucred = crhold(p2->p_ucred);
541
542 if (flags & RFSIGSHARE) {
543 p2->p_sigacts = sigacts_hold(p1->p_sigacts);
544 } else {
545 sigacts_copy(newsigacts, p1->p_sigacts);
546 p2->p_sigacts = newsigacts;
547 }
548 if (flags & RFLINUXTHPN)
549 p2->p_sigparent = SIGUSR1;
550 else
551 p2->p_sigparent = SIGCHLD;
552
553 p2->p_textvp = p1->p_textvp;
554 p2->p_fd = fd;
555 p2->p_fdtol = fdtol;
556
557 /*
558 * p_limit is copy-on-write. Bump its refcount.
559 */
560 lim_fork(p1, p2);
561
562 pstats_fork(p1->p_stats, p2->p_stats);
563
564 PROC_UNLOCK(p1);
565 PROC_UNLOCK(p2);
566
567 /* Bump references to the text vnode (for procfs) */
568 if (p2->p_textvp)
569 vref(p2->p_textvp);
570
571 /*
572 * Set up linkage for kernel based threading.
573 */
574 if ((flags & RFTHREAD) != 0) {
575 mtx_lock(&ppeers_lock);
576 p2->p_peers = p1->p_peers;
577 p1->p_peers = p2;
578 p2->p_leader = p1->p_leader;
579 mtx_unlock(&ppeers_lock);
580 PROC_LOCK(p1->p_leader);
581 if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
582 PROC_UNLOCK(p1->p_leader);
583 /*
584 * The task leader is exiting, so process p1 is
585 * going to be killed shortly. Since p1 obviously
586 * isn't dead yet, we know that the leader is either
587 * sending SIGKILL's to all the processes in this
588 * task or is sleeping waiting for all the peers to
589 * exit. We let p1 complete the fork, but we need
590 * to go ahead and kill the new process p2 since
591 * the task leader may not get a chance to send
592 * SIGKILL to it. We leave it on the list so that
593 * the task leader will wait for this new process
594 * to commit suicide.
595 */
596 PROC_LOCK(p2);
597 psignal(p2, SIGKILL);
598 PROC_UNLOCK(p2);
599 } else
600 PROC_UNLOCK(p1->p_leader);
601 } else {
602 p2->p_peers = NULL;
603 p2->p_leader = p2;
604 }
605
606 sx_xlock(&proctree_lock);
607 PGRP_LOCK(p1->p_pgrp);
608 PROC_LOCK(p2);
609 PROC_LOCK(p1);
610
611 /*
612 * Preserve some more flags in subprocess. P_PROFIL has already
613 * been preserved.
614 */
615 p2->p_flag |= p1->p_flag & P_SUGID;
616 td2->td_pflags |= td->td_pflags & TDP_ALTSTACK;
617 SESS_LOCK(p1->p_session);
618 if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
619 p2->p_flag |= P_CONTROLT;
620 SESS_UNLOCK(p1->p_session);
621 if (flags & RFPPWAIT)
622 p2->p_flag |= P_PPWAIT;
623
624 p2->p_pgrp = p1->p_pgrp;
625 LIST_INSERT_AFTER(p1, p2, p_pglist);
626 PGRP_UNLOCK(p1->p_pgrp);
627 LIST_INIT(&p2->p_children);
628
629 callout_init(&p2->p_itcallout, CALLOUT_MPSAFE);
630
631#ifdef KTRACE
632 /*
633 * Copy traceflag and tracefile if enabled.
634 */
635 mtx_lock(&ktrace_mtx);
636 KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
637 if (p1->p_traceflag & KTRFAC_INHERIT) {
638 p2->p_traceflag = p1->p_traceflag;
639 if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
640 VREF(p2->p_tracevp);
641 KASSERT(p1->p_tracecred != NULL,
642 ("ktrace vnode with no cred"));
643 p2->p_tracecred = crhold(p1->p_tracecred);
644 }
645 }
646 mtx_unlock(&ktrace_mtx);
647#endif
648
649 /*
650 * If PF_FORK is set, the child process inherits the
651 * procfs ioctl flags from its parent.
652 */
653 if (p1->p_pfsflags & PF_FORK) {
654 p2->p_stops = p1->p_stops;
655 p2->p_pfsflags = p1->p_pfsflags;
656 }
657
658#ifdef KDTRACE_HOOKS
659 /*
660 * Tell the DTrace fasttrap provider about the new process
661 * if it has registered an interest.
662 */
663 if (dtrace_fasttrap_fork)
664 dtrace_fasttrap_fork(p1, p2);
665#endif
666
667 /*
668 * This begins the section where we must prevent the parent
669 * from being swapped.
670 */
671 _PHOLD(p1);
672 PROC_UNLOCK(p1);
673
674 /*
675 * Attach the new process to its parent.
676 *
677 * If RFNOWAIT is set, the newly created process becomes a child
678 * of init. This effectively disassociates the child from the
679 * parent.
680 */
681 if (flags & RFNOWAIT)
682 pptr = initproc;
683 else
684 pptr = p1;
685 p2->p_pptr = pptr;
686 LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
687 sx_xunlock(&proctree_lock);
688
689 /* Inform accounting that we have forked. */
690 p2->p_acflag = AFORK;
691 PROC_UNLOCK(p2);
692
693 /*
694 * Finish creating the child process. It will return via a different
695 * execution path later. (ie: directly into user mode)
696 */
697 vm_forkproc(td, p2, td2, vm2, flags);
698
699 if (flags == (RFFDG | RFPROC)) {
700 PCPU_INC(cnt.v_forks);
701 PCPU_ADD(cnt.v_forkpages, p2->p_vmspace->vm_dsize +
702 p2->p_vmspace->vm_ssize);
703 } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
704 PCPU_INC(cnt.v_vforks);
705 PCPU_ADD(cnt.v_vforkpages, p2->p_vmspace->vm_dsize +
706 p2->p_vmspace->vm_ssize);
707 } else if (p1 == &proc0) {
708 PCPU_INC(cnt.v_kthreads);
709 PCPU_ADD(cnt.v_kthreadpages, p2->p_vmspace->vm_dsize +
710 p2->p_vmspace->vm_ssize);
711 } else {
712 PCPU_INC(cnt.v_rforks);
713 PCPU_ADD(cnt.v_rforkpages, p2->p_vmspace->vm_dsize +
714 p2->p_vmspace->vm_ssize);
715 }
716
717 /*
718 * Both processes are set up, now check if any loadable modules want
719 * to adjust anything.
720 * What if they have an error? XXX
721 */
722 EVENTHANDLER_INVOKE(process_fork, p1, p2, flags);
723
724 /*
725 * Set the child start time and mark the process as being complete.
726 */
727 microuptime(&p2->p_stats->p_start);
728 PROC_SLOCK(p2);
729 p2->p_state = PRS_NORMAL;
730 PROC_SUNLOCK(p2);
731
732 /*
733 * If RFSTOPPED not requested, make child runnable and add to
734 * run queue.
735 */
736 if ((flags & RFSTOPPED) == 0) {
737 thread_lock(td2);
738 TD_SET_CAN_RUN(td2);
739 sched_add(td2, SRQ_BORING);
740 thread_unlock(td2);
741 }
742
743 /*
744 * Now can be swapped.
745 */
746 PROC_LOCK(p1);
747 _PRELE(p1);
748 PROC_UNLOCK(p1);
749
750 /*
751 * Tell any interested parties about the new process.
752 */
753 knote_fork(&p1->p_klist, p2->p_pid);
754 SDT_PROBE(proc, kernel, , create, p2, p1, flags, 0, 0);
755
756 /*
757 * Preserve synchronization semantics of vfork. If waiting for
758 * child to exec or exit, set P_PPWAIT on child, and sleep on our
759 * proc (in case of exit).
760 */
761 PROC_LOCK(p2);
762 while (p2->p_flag & P_PPWAIT)
763 cv_wait(&p2->p_pwait, &p2->p_mtx);
764 PROC_UNLOCK(p2);
765
766 /*
767 * Return child proc pointer to parent.
768 */
769 *procp = p2;
770 return (0);
771fail:
772 sx_sunlock(&proctree_lock);
773 if (ppsratecheck(&lastfail, &curfail, 1))
774 printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",
775 td->td_ucred->cr_ruid);
776 sx_xunlock(&allproc_lock);
777#ifdef MAC
778 mac_proc_destroy(newproc);
779#endif
780fail1:
781 if (vm2 != NULL)
782 vmspace_free(vm2);
783 uma_zfree(proc_zone, newproc);
784 pause("fork", hz / 2);
785 return (error);
786}
787
788/*
789 * Handle the return of a child process from fork1(). This function
790 * is called from the MD fork_trampoline() entry point.
791 */
792void
793fork_exit(callout, arg, frame)
794 void (*callout)(void *, struct trapframe *);
795 void *arg;
796 struct trapframe *frame;
797{
798 struct proc *p;
799 struct thread *td;
800 struct thread *dtd;
801
802 td = curthread;
803 p = td->td_proc;
804 KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new"));
805
806 CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)",
807 td, td->td_sched, p->p_pid, td->td_name);
808
809 sched_fork_exit(td);
810 /*
811 * Processes normally resume in mi_switch() after being
812 * cpu_switch()'ed to, but when children start up they arrive here
813 * instead, so we must do much the same things as mi_switch() would.
814 */
815 if ((dtd = PCPU_GET(deadthread))) {
816 PCPU_SET(deadthread, NULL);
817 thread_stash(dtd);
818 }
819 thread_unlock(td);
820
821 /*
822 * cpu_set_fork_handler intercepts this function call to
823 * have this call a non-return function to stay in kernel mode.
824 * initproc has its own fork handler, but it does return.
825 */
826 KASSERT(callout != NULL, ("NULL callout in fork_exit"));
827 callout(arg, frame);
828
829 /*
830 * Check if a kernel thread misbehaved and returned from its main
831 * function.
832 */
833 if (p->p_flag & P_KTHREAD) {
834 printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n",
835 td->td_name, p->p_pid);
836 kproc_exit(0);
837 }
838 mtx_assert(&Giant, MA_NOTOWNED);
839
840 EVENTHANDLER_INVOKE(schedtail, p);
841}
842
843/*
844 * Simplified back end of syscall(), used when returning from fork()
845 * directly into user mode. Giant is not held on entry, and must not
846 * be held on return. This function is passed in to fork_exit() as the
847 * first parameter and is called when returning to a new userland process.
848 */
849void
850fork_return(td, frame)
851 struct thread *td;
852 struct trapframe *frame;
853{
854
855 userret(td, frame);
856#ifdef KTRACE
857 if (KTRPOINT(td, KTR_SYSRET))
858 ktrsysret(SYS_fork, 0, 0);
859#endif
860 mtx_assert(&Giant, MA_NOTOWNED);
861}