init_main.c revision 206142
1109864Sjeff/*-
2113357Sjeff * Copyright (c) 1995 Terrence R. Lambert
3109864Sjeff * All rights reserved.
4109864Sjeff *
5109864Sjeff * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6109864Sjeff *	The Regents of the University of California.  All rights reserved.
7109864Sjeff * (c) UNIX System Laboratories, Inc.
8109864Sjeff * All or some portions of this file are derived from material licensed
9109864Sjeff * to the University of California by American Telephone and Telegraph
10109864Sjeff * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11109864Sjeff * the permission of UNIX System Laboratories, Inc.
12109864Sjeff *
13109864Sjeff * Redistribution and use in source and binary forms, with or without
14109864Sjeff * modification, are permitted provided that the following conditions
15109864Sjeff * are met:
16109864Sjeff * 1. Redistributions of source code must retain the above copyright
17109864Sjeff *    notice, this list of conditions and the following disclaimer.
18109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright
19109864Sjeff *    notice, this list of conditions and the following disclaimer in the
20109864Sjeff *    documentation and/or other materials provided with the distribution.
21109864Sjeff * 3. All advertising materials mentioning features or use of this software
22109864Sjeff *    must display the following acknowledgement:
23109864Sjeff *	This product includes software developed by the University of
24109864Sjeff *	California, Berkeley and its contributors.
25109864Sjeff * 4. Neither the name of the University nor the names of its contributors
26109864Sjeff *    may be used to endorse or promote products derived from this software
27116182Sobrien *    without specific prior written permission.
28116182Sobrien *
29116182Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30109864Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31109864Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32109864Sjeff * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33109864Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34109864Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35109864Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36109864Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37112966Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38122038Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39109864Sjeff * SUCH DAMAGE.
40109864Sjeff *
41109864Sjeff *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
42109864Sjeff */
43109864Sjeff
44109864Sjeff#include <sys/cdefs.h>
45109864Sjeff__FBSDID("$FreeBSD: head/sys/kern/init_main.c 206142 2010-04-03 19:07:05Z alc $");
46109864Sjeff
47109864Sjeff#include "opt_ddb.h"
48109864Sjeff#include "opt_init_path.h"
49109864Sjeff
50109864Sjeff#include <sys/param.h>
51109864Sjeff#include <sys/kernel.h>
52109864Sjeff#include <sys/exec.h>
53109864Sjeff#include <sys/file.h>
54121790Sjeff#include <sys/filedesc.h>
55109864Sjeff#include <sys/jail.h>
56113357Sjeff#include <sys/ktr.h>
57113357Sjeff#include <sys/lock.h>
58109864Sjeff#include <sys/mount.h>
59109864Sjeff#include <sys/mutex.h>
60109864Sjeff#include <sys/syscallsubr.h>
61109864Sjeff#include <sys/sysctl.h>
62109864Sjeff#include <sys/proc.h>
63109864Sjeff#include <sys/resourcevar.h>
64109864Sjeff#include <sys/systm.h>
65109864Sjeff#include <sys/signalvar.h>
66113357Sjeff#include <sys/vnode.h>
67113357Sjeff#include <sys/sysent.h>
68113357Sjeff#include <sys/reboot.h>
69113357Sjeff#include <sys/sched.h>
70113357Sjeff#include <sys/sx.h>
71116365Sjeff#include <sys/sysproto.h>
72113357Sjeff#include <sys/vmmeter.h>
73113357Sjeff#include <sys/unistd.h>
74111857Sjeff#include <sys/malloc.h>
75113357Sjeff#include <sys/conf.h>
76111857Sjeff#include <sys/cpuset.h>
77116069Sjeff
78123487Sjeff#include <machine/cpu.h>
79116069Sjeff
80123487Sjeff#include <security/audit/audit.h>
81116069Sjeff#include <security/mac/mac_framework.h>
82116069Sjeff
83109864Sjeff#include <vm/vm.h>
84109864Sjeff#include <vm/vm_param.h>
85109864Sjeff#include <vm/pmap.h>
86109864Sjeff#include <vm/vm_map.h>
87109864Sjeff#include <sys/copyright.h>
88109864Sjeff
89109864Sjeff#include <ddb/ddb.h>
90109864Sjeff#include <ddb/db_sym.h>
91109864Sjeff
92109864Sjeffvoid mi_startup(void);				/* Should be elsewhere */
93109864Sjeff
94109864Sjeff/* Components of the first process -- never freed. */
95113357Sjeffstatic struct session session0;
96110260Sjeffstatic struct pgrp pgrp0;
97109864Sjeffstruct	proc proc0;
98109864Sjeffstruct	thread thread0 __aligned(16);
99109864Sjeffstruct	vmspace vmspace0;
100109864Sjeffstruct	proc *initproc;
101109864Sjeff
102109864Sjeffint	boothowto = 0;		/* initialized so that it can be patched */
103110260SjeffSYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "");
104121790Sjeffint	bootverbose;
105109864SjeffSYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "");
106121790Sjeff
107122158Sjeff/*
108121790Sjeff * This ensures that there is at least one entry so that the sysinit_set
109109864Sjeff * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
110110645Sjeff * executed.
111110645Sjeff */
112109864SjeffSYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL);
113109864Sjeff
114110645Sjeff/*
115109864Sjeff * The sysinit table itself.  Items are checked off as the are run.
116109864Sjeff * If we want to register new sysinit types, add them to newsysinit.
117109864Sjeff */
118109864SjeffSET_DECLARE(sysinit_set, struct sysinit);
119109864Sjeffstruct sysinit **sysinit, **sysinit_end;
120109864Sjeffstruct sysinit **newsysinit, **newsysinit_end;
121110267Sjeff
122109864Sjeff/*
123109864Sjeff * Merge a new sysinit set into the current set, reallocating it if
124109864Sjeff * necessary.  This can only be called after malloc is running.
125109864Sjeff */
126109864Sjeffvoid
127109864Sjeffsysinit_add(struct sysinit **set, struct sysinit **set_end)
128109864Sjeff{
129109864Sjeff	struct sysinit **newset;
130109864Sjeff	struct sysinit **sipp;
131116642Sjeff	struct sysinit **xipp;
132116642Sjeff	int count;
133116642Sjeff
134116642Sjeff	count = set_end - set;
135111857Sjeff	if (newsysinit)
136111857Sjeff		count += newsysinit_end - newsysinit;
137116642Sjeff	else
138111857Sjeff		count += sysinit_end - sysinit;
139109864Sjeff	newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT);
140111857Sjeff	if (newset == NULL)
141121869Sjeff		panic("cannot malloc for sysinit");
142121869Sjeff	xipp = newset;
143116642Sjeff	if (newsysinit)
144113357Sjeff		for (sipp = newsysinit; sipp < newsysinit_end; sipp++)
145116642Sjeff			*xipp++ = *sipp;
146109864Sjeff	else
147109864Sjeff		for (sipp = sysinit; sipp < sysinit_end; sipp++)
148111857Sjeff			*xipp++ = *sipp;
149109864Sjeff	for (sipp = set; sipp < set_end; sipp++)
150110645Sjeff		*xipp++ = *sipp;
151110645Sjeff	if (newsysinit)
152121868Sjeff		free(newsysinit, M_TEMP);
153116365Sjeff	newsysinit = newset;
154111857Sjeff	newsysinit_end = newset + count;
155109864Sjeff}
156121126Sjeff
157121868Sjeff/*
158116365Sjeff * System startup; initialize the world, create process 0, mount root
159116365Sjeff * filesystem, and fork to create init and pagedaemon.  Most of the
160121126Sjeff * hard work is done in the lower-level initialization routines including
161111857Sjeff * startup(), which does memory initialization and autoconfiguration.
162109864Sjeff *
163109864Sjeff * This allows simple addition of new kernel subsystems that require
164109864Sjeff * boot time initialization.  It also allows substitution of subsystem
165109864Sjeff * (for instance, a scheduler, kernel profiler, or VM system) by object
166109864Sjeff * module.  Finally, it allows for optional "kernel threads".
167109864Sjeff */
168109864Sjeffvoid
169112966Sjeffmi_startup(void)
170112966Sjeff{
171121871Sjeff
172109864Sjeff	register struct sysinit **sipp;		/* system initialization*/
173113357Sjeff	register struct sysinit **xipp;		/* interior loop of sort*/
174113357Sjeff	register struct sysinit *save;		/* bubble*/
175125299Sjeff
176121871Sjeff#if defined(VERBOSE_SYSINIT)
177111857Sjeff	int last;
178109864Sjeff	int verbose;
179112966Sjeff#endif
180121871Sjeff
181109864Sjeff	if (sysinit == NULL) {
182109864Sjeff		sysinit = SET_BEGIN(sysinit_set);
183109864Sjeff		sysinit_end = SET_LIMIT(sysinit_set);
184109864Sjeff	}
185109864Sjeff
186113357Sjeffrestart:
187113357Sjeff	/*
188113417Sjeff	 * Perform a bubble sort of the system initialization objects by
189127278Sobrien	 * their subsystem (primary key) and order (secondary key).
190121107Sjeff	 */
191109864Sjeff	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
192109864Sjeff		for (xipp = sipp + 1; xipp < sysinit_end; xipp++) {
193109864Sjeff			if ((*sipp)->subsystem < (*xipp)->subsystem ||
194109864Sjeff			     ((*sipp)->subsystem == (*xipp)->subsystem &&
195109864Sjeff			      (*sipp)->order <= (*xipp)->order))
196109864Sjeff				continue;	/* skip*/
197109864Sjeff			save = *sipp;
198109864Sjeff			*sipp = *xipp;
199112971Sjeff			*xipp = save;
200109864Sjeff		}
201109864Sjeff	}
202109864Sjeff
203113357Sjeff#if defined(VERBOSE_SYSINIT)
204109864Sjeff	last = SI_SUB_COPYRIGHT;
205109864Sjeff	verbose = 0;
206113357Sjeff#if !defined(DDB)
207113357Sjeff	printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n");
208113357Sjeff#endif
209113357Sjeff#endif
210121896Sjeff
211113357Sjeff	/*
212121869Sjeff	 * Traverse the (now) ordered list of system initialization tasks.
213113357Sjeff	 * Perform each task, and continue on to the next task.
214110267Sjeff	 *
215123433Sjeff	 * The last item on the list is expected to be the scheduler,
216123433Sjeff	 * which will not return.
217123433Sjeff	 */
218123433Sjeff	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
219125289Sjeff
220125289Sjeff		if ((*sipp)->subsystem == SI_SUB_DUMMY)
221110267Sjeff			continue;	/* skip dummy task(s)*/
222109864Sjeff
223109864Sjeff		if ((*sipp)->subsystem == SI_SUB_DONE)
224123433Sjeff			continue;
225109864Sjeff
226123433Sjeff#if defined(VERBOSE_SYSINIT)
227123433Sjeff		if ((*sipp)->subsystem > last) {
228123433Sjeff			verbose = 1;
229123433Sjeff			last = (*sipp)->subsystem;
230123433Sjeff			printf("subsystem %x\n", last);
231123433Sjeff		}
232123433Sjeff		if (verbose) {
233123433Sjeff#if defined(DDB)
234123433Sjeff			const char *name;
235127498Smarcel			c_db_sym_t sym;
236127498Smarcel			db_expr_t  offset;
237127498Smarcel
238123487Sjeff			sym = db_search_symbol((vm_offset_t)(*sipp)->func,
239123433Sjeff			    DB_STGY_PROC, &offset);
240123433Sjeff			db_symbol_values(sym, &name, NULL);
241123433Sjeff			if (name != NULL)
242123433Sjeff				printf("   %s(%p)... ", name, (*sipp)->udata);
243123433Sjeff			else
244123433Sjeff#endif
245109864Sjeff				printf("   %p(%p)... ", (*sipp)->func,
246109864Sjeff				    (*sipp)->udata);
247110028Sjeff		}
248127498Smarcel#endif
249123487Sjeff
250121790Sjeff		/* Call function */
251123433Sjeff		(*((*sipp)->func))((*sipp)->udata);
252123433Sjeff
253123433Sjeff#if defined(VERBOSE_SYSINIT)
254123487Sjeff		if (verbose)
255123487Sjeff			printf("done.\n");
256123433Sjeff#endif
257121790Sjeff
258110028Sjeff		/* Check off the one we're just done */
259110028Sjeff		(*sipp)->subsystem = SI_SUB_DONE;
260110028Sjeff
261109864Sjeff		/* Check if we've installed more sysinit items via KLD */
262112966Sjeff		if (newsysinit != NULL) {
263113357Sjeff			if (sysinit != SET_BEGIN(sysinit_set))
264111857Sjeff				free(sysinit, M_TEMP);
265116463Sjeff			sysinit = newsysinit;
266121868Sjeff			sysinit_end = newsysinit_end;
267121790Sjeff			newsysinit = NULL;
268109864Sjeff			newsysinit_end = NULL;
269110267Sjeff			goto restart;
270121790Sjeff		}
271110028Sjeff	}
272122744Sjeff
273122744Sjeff	panic("Shouldn't get here!");
274122744Sjeff	/* NOTREACHED*/
275122744Sjeff}
276113357Sjeff
277113357Sjeff
278113660Sjeff/*
279110267Sjeff ***************************************************************************
280123433Sjeff ****
281121790Sjeff **** The following SYSINIT's belong elsewhere, but have not yet
282122744Sjeff **** been moved.
283123487Sjeff ****
284123487Sjeff ***************************************************************************
285121790Sjeff */
286123433Sjeffstatic void
287121790Sjeffprint_caddr_t(void *data)
288121790Sjeff{
289123433Sjeff	printf("%s", (char *)data);
290123693Sjeff}
291123693Sjeff
292123693Sjeffstatic void
293123693Sjeffprint_version(void *data __unused)
294123693Sjeff{
295123693Sjeff	int len;
296122038Sjeff
297123693Sjeff	/* Strip a trailing newline from version. */
298123693Sjeff	len = strlen(version);
299123693Sjeff	while (len > 0 && version[len - 1] == '\n')
300122158Sjeff		len--;
301122165Sjeff	printf("%.*s %s\n", len, version, machine);
302123693Sjeff}
303121790Sjeff
304110028SjeffSYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t,
305113357Sjeff    copyright);
306113660SjeffSYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t,
307110267Sjeff    trademark);
308113660SjeffSYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL);
309113357Sjeff
310112994Sjeff#ifdef WITNESS
311113660Sjeffstatic char wit_warn[] =
312112994Sjeff     "WARNING: WITNESS option enabled, expect reduced performance.\n";
313113357SjeffSYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1,
314113357Sjeff   print_caddr_t, wit_warn);
315122744SjeffSYSINIT(witwarn2, SI_SUB_RUN_SCHEDULER, SI_ORDER_THIRD + 1,
316121896Sjeff   print_caddr_t, wit_warn);
317123433Sjeff#endif
318121896Sjeff
319113357Sjeff#ifdef DIAGNOSTIC
320113357Sjeffstatic char diag_warn[] =
321121869Sjeff     "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n";
322113357SjeffSYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 2,
323113357Sjeff    print_caddr_t, diag_warn);
324113357SjeffSYSINIT(diagwarn2, SI_SUB_RUN_SCHEDULER, SI_ORDER_THIRD + 2,
325113357Sjeff    print_caddr_t, diag_warn);
326112994Sjeff#endif
327122744Sjeff
328122744Sjeffstatic void
329122744Sjeffset_boot_verbose(void *data __unused)
330122744Sjeff{
331123433Sjeff
332123433Sjeff	if (boothowto & RB_VERBOSE)
333123433Sjeff		bootverbose++;
334123433Sjeff}
335122744SjeffSYSINIT(boot_verbose, SI_SUB_TUNABLES, SI_ORDER_ANY, set_boot_verbose, NULL);
336122744Sjeff
337122744Sjeffstruct sysentvec null_sysvec = {
338122744Sjeff	.sv_size	= 0,
339122744Sjeff	.sv_table	= NULL,
340122744Sjeff	.sv_mask	= 0,
341122744Sjeff	.sv_sigsize	= 0,
342122744Sjeff	.sv_sigtbl	= NULL,
343123433Sjeff	.sv_errsize	= 0,
344123433Sjeff	.sv_errtbl	= NULL,
345123433Sjeff	.sv_transtrap	= NULL,
346123433Sjeff	.sv_fixup	= NULL,
347122744Sjeff	.sv_sendsig	= NULL,
348122744Sjeff	.sv_sigcode	= NULL,
349122744Sjeff	.sv_szsigcode	= NULL,
350122744Sjeff	.sv_prepsyscall	= NULL,
351113357Sjeff	.sv_name	= "null",
352122744Sjeff	.sv_coredump	= NULL,
353113357Sjeff	.sv_imgact_try	= NULL,
354121896Sjeff	.sv_minsigstksz	= 0,
355115998Sjeff	.sv_pagesize	= PAGE_SIZE,
356121896Sjeff	.sv_minuser	= VM_MIN_ADDRESS,
357121896Sjeff	.sv_maxuser	= VM_MAXUSER_ADDRESS,
358121896Sjeff	.sv_usrstack	= USRSTACK,
359113357Sjeff	.sv_psstrings	= PS_STRINGS,
360125289Sjeff	.sv_stackprot	= VM_PROT_ALL,
361123487Sjeff	.sv_copyout_strings	= NULL,
362123487Sjeff	.sv_setregs	= NULL,
363125289Sjeff	.sv_fixlimit	= NULL,
364125289Sjeff	.sv_maxssiz	= NULL
365123487Sjeff};
366113357Sjeff
367122744Sjeff/*
368122744Sjeff ***************************************************************************
369122744Sjeff ****
370122744Sjeff **** The two following SYSINIT's are proc0 specific glue code.  I am not
371113357Sjeff **** convinced that they can not be safely combined, but their order of
372113357Sjeff **** operation has been maintained as the same as the original init_main.c
373110267Sjeff **** for right now.
374113357Sjeff ****
375112994Sjeff **** These probably belong in init_proc.c or kern_proc.c, since they
376122744Sjeff **** deal with proc0 (the fork template process).
377110267Sjeff ****
378121896Sjeff ***************************************************************************
379115998Sjeff */
380121896Sjeff/* ARGSUSED*/
381121896Sjeffstatic void
382121896Sjeffproc0_init(void *dummy __unused)
383125289Sjeff{
384123487Sjeff	struct proc *p;
385123487Sjeff	unsigned i;
386125289Sjeff	struct thread *td;
387125289Sjeff
388123487Sjeff	GIANT_REQUIRED;
389113357Sjeff	p = &proc0;
390113357Sjeff	td = &thread0;
391113357Sjeff
392113357Sjeff	/*
393110267Sjeff	 * Initialize magic number and osrel.
394110267Sjeff	 */
395113357Sjeff	p->p_magic = P_MAGIC;
396113357Sjeff	p->p_osrel = osreldate;
397110267Sjeff
398115998Sjeff	/*
399113357Sjeff	 * Initialize thread and process structures.
400113357Sjeff	 */
401121896Sjeff	procinit();	/* set up proc zone */
402113357Sjeff	threadinit();	/* set up UMA zones */
403110267Sjeff
404110267Sjeff	/*
405113357Sjeff	 * Initialise scheduler resources.
406113357Sjeff	 * Add scheduler specific parts to proc, thread as needed.
407110267Sjeff	 */
408113357Sjeff	schedinit();	/* scheduler gets its house in order */
409113357Sjeff	/*
410115998Sjeff	 * Initialize sleep queue hash table
411113357Sjeff	 */
412113357Sjeff	sleepinit();
413113357Sjeff
414113357Sjeff	/*
415113357Sjeff	 * additional VM structures
416113357Sjeff	 */
417113357Sjeff	vm_init2();
418113357Sjeff
419113357Sjeff	/*
420113357Sjeff	 * Create process 0 (the swapper).
421113357Sjeff	 */
422113357Sjeff	LIST_INSERT_HEAD(&allproc, p, p_list);
423121896Sjeff	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
424113357Sjeff	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
425113357Sjeff	p->p_pgrp = &pgrp0;
426121869Sjeff	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
427113357Sjeff	LIST_INIT(&pgrp0.pg_members);
428113357Sjeff	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
429113357Sjeff
430113357Sjeff	pgrp0.pg_session = &session0;
431110267Sjeff	mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF);
432110267Sjeff	refcount_init(&session0.s_count, 1);
433113357Sjeff	session0.s_leader = p;
434116069Sjeff
435122744Sjeff	p->p_sysent = &null_sysvec;
436116069Sjeff	p->p_flag = P_SYSTEM | P_INMEM;
437116069Sjeff	p->p_state = PRS_NORMAL;
438116069Sjeff	knlist_init_mtx(&p->p_klist, &p->p_mtx);
439116069Sjeff	STAILQ_INIT(&p->p_ktr);
440116069Sjeff	p->p_nice = NZERO;
441116069Sjeff	td->td_tid = PID_MAX + 1;
442116069Sjeff	td->td_state = TDS_RUNNING;
443116069Sjeff	td->td_pri_class = PRI_TIMESHARE;
444116069Sjeff	td->td_user_pri = PUSER;
445116069Sjeff	td->td_base_user_pri = PUSER;
446116069Sjeff	td->td_priority = PVM;
447116069Sjeff	td->td_base_pri = PUSER;
448116069Sjeff	td->td_oncpu = 0;
449116069Sjeff	td->td_flags = TDF_INMEM|TDP_KTHREAD;
450121790Sjeff	td->td_cpuset = cpuset_thread0();
451122744Sjeff	prison0.pr_cpuset = cpuset_ref(td->td_cpuset);
452116069Sjeff	p->p_peers = 0;
453123487Sjeff	p->p_leader = p;
454123487Sjeff
455123487Sjeff
456123487Sjeff	strncpy(p->p_comm, "kernel", sizeof (p->p_comm));
457123487Sjeff	strncpy(td->td_name, "swapper", sizeof (td->td_name));
458123487Sjeff
459123487Sjeff	callout_init(&p->p_itcallout, CALLOUT_MPSAFE);
460123487Sjeff	callout_init_mtx(&p->p_limco, &p->p_mtx, 0);
461123487Sjeff	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
462123487Sjeff
463123487Sjeff	/* Create credentials. */
464123487Sjeff	p->p_ucred = crget();
465123487Sjeff	p->p_ucred->cr_ngroups = 1;	/* group 0 */
466123487Sjeff	p->p_ucred->cr_uidinfo = uifind(0);
467123487Sjeff	p->p_ucred->cr_ruidinfo = uifind(0);
468123487Sjeff	p->p_ucred->cr_prison = &prison0;
469123487Sjeff#ifdef AUDIT
470123487Sjeff	audit_cred_kproc0(p->p_ucred);
471123487Sjeff#endif
472123487Sjeff#ifdef MAC
473123487Sjeff	mac_cred_create_swapper(p->p_ucred);
474123487Sjeff#endif
475123487Sjeff	td->td_ucred = crhold(p->p_ucred);
476123487Sjeff
477123487Sjeff	/* Create sigacts. */
478123487Sjeff	p->p_sigacts = sigacts_alloc();
479123487Sjeff
480123487Sjeff	/* Initialize signal state for process 0. */
481123487Sjeff	siginit(&proc0);
482123487Sjeff
483123487Sjeff	/* Create the file descriptor table. */
484123487Sjeff	p->p_fd = fdinit(NULL);
485123487Sjeff	p->p_fdtol = NULL;
486123487Sjeff
487123487Sjeff	/* Create the limits structures. */
488123487Sjeff	p->p_limit = lim_alloc();
489123487Sjeff	for (i = 0; i < RLIM_NLIMITS; i++)
490123487Sjeff		p->p_limit->pl_rlimit[i].rlim_cur =
491123487Sjeff		    p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
492123487Sjeff	p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur =
493123487Sjeff	    p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
494123487Sjeff	p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur =
495123487Sjeff	    p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
496123487Sjeff	i = ptoa(cnt.v_free_count);
497123487Sjeff	p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = i;
498123487Sjeff	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
499123487Sjeff	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
500123487Sjeff	p->p_cpulimit = RLIM_INFINITY;
501123487Sjeff
502123487Sjeff	p->p_stats = pstats_alloc();
503123487Sjeff
504123487Sjeff	/* Allocate a prototype map so we have something to fork. */
505123487Sjeff	pmap_pinit0(vmspace_pmap(&vmspace0));
506116069Sjeff	p->p_vmspace = &vmspace0;
507123487Sjeff	vmspace0.vm_refcnt = 1;
508123487Sjeff
509123487Sjeff	/*
510123487Sjeff	 * proc0 is not expected to enter usermode, so there is no special
511123487Sjeff	 * handling for sv_minuser here, like is done for exec_new_vmspace().
512123487Sjeff	 */
513123487Sjeff	vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0),
514123487Sjeff	    p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser);
515123487Sjeff
516123487Sjeff	/*-
517123487Sjeff	 * call the init and ctor for the new thread and proc
518123487Sjeff	 * we wait to do this until all other structures
519123487Sjeff	 * are fairly sane.
520123487Sjeff	 */
521123487Sjeff	EVENTHANDLER_INVOKE(process_init, p);
522123487Sjeff	EVENTHANDLER_INVOKE(thread_init, td);
523123487Sjeff	EVENTHANDLER_INVOKE(process_ctor, p);
524123487Sjeff	EVENTHANDLER_INVOKE(thread_ctor, td);
525123487Sjeff
526123487Sjeff	/*
527123487Sjeff	 * Charge root for one process.
528123487Sjeff	 */
529123487Sjeff	(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
530123487Sjeff}
531123433SjeffSYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL);
532116069Sjeff
533116069Sjeff/* ARGSUSED*/
534116069Sjeffstatic void
535116069Sjeffproc0_post(void *dummy __unused)
536116069Sjeff{
537116069Sjeff	struct timespec ts;
538116069Sjeff	struct proc *p;
539123433Sjeff	struct rusage ru;
540123433Sjeff	struct thread *td;
541123433Sjeff
542123433Sjeff	/*
543123487Sjeff	 * Now we can look at the time, having had a chance to verify the
544123487Sjeff	 * time from the filesystem.  Pretend that proc0 started now.
545123487Sjeff	 */
546123487Sjeff	sx_slock(&allproc_lock);
547123487Sjeff	FOREACH_PROC_IN_SYSTEM(p) {
548123487Sjeff		microuptime(&p->p_stats->p_start);
549123487Sjeff		PROC_SLOCK(p);
550123487Sjeff		rufetch(p, &ru);	/* Clears thread stats */
551123487Sjeff		PROC_SUNLOCK(p);
552123487Sjeff		p->p_rux.rux_runtime = 0;
553123487Sjeff		p->p_rux.rux_uticks = 0;
554123487Sjeff		p->p_rux.rux_sticks = 0;
555123487Sjeff		p->p_rux.rux_iticks = 0;
556123487Sjeff		FOREACH_THREAD_IN_PROC(p, td) {
557123487Sjeff			td->td_runtime = 0;
558123487Sjeff		}
559123487Sjeff	}
560123487Sjeff	sx_sunlock(&allproc_lock);
561123487Sjeff	PCPU_SET(switchtime, cpu_ticks());
562123433Sjeff	PCPU_SET(switchticks, ticks);
563123487Sjeff
564123433Sjeff	/*
565122744Sjeff	 * Give the ``random'' number generator a thump.
566123433Sjeff	 */
567122744Sjeff	nanotime(&ts);
568123487Sjeff	srandom(ts.tv_sec ^ ts.tv_nsec);
569116069Sjeff}
570116069SjeffSYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL);
571116069Sjeff
572123433Sjeffstatic void
573116069Sjeffrandom_init(void *dummy __unused)
574123487Sjeff{
575116069Sjeff
576116069Sjeff	/*
577116069Sjeff	 * After CPU has been started we have some randomness on most
578121790Sjeff	 * platforms via get_cyclecount().  For platforms that don't
579116069Sjeff	 * we will reseed random(9) in proc0_post() as well.
580116069Sjeff	 */
581123433Sjeff	srandom(get_cyclecount());
582123433Sjeff}
583116069SjeffSYSINIT(random, SI_SUB_RANDOM, SI_ORDER_FIRST, random_init, NULL);
584116069Sjeff
585123433Sjeff/*
586123433Sjeff ***************************************************************************
587123433Sjeff ****
588123433Sjeff **** The following SYSINIT's and glue code should be moved to the
589123433Sjeff **** respective files on a per subsystem basis.
590123433Sjeff ****
591123433Sjeff ***************************************************************************
592123433Sjeff */
593123433Sjeff
594123433Sjeff
595123433Sjeff/*
596123433Sjeff ***************************************************************************
597123433Sjeff ****
598123433Sjeff **** The following code probably belongs in another file, like
599123433Sjeff **** kern/init_init.c.
600123433Sjeff ****
601123433Sjeff ***************************************************************************
602123433Sjeff */
603123433Sjeff
604123433Sjeff/*
605116069Sjeff * List of paths to try when searching for "init".
606123433Sjeff */
607123433Sjeffstatic char init_path[MAXPATHLEN] =
608121923Sjeff#ifdef	INIT_PATH
609116069Sjeff    __XSTRING(INIT_PATH);
610110267Sjeff#else
611123433Sjeff    "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init:/stand/sysinstall";
612123433Sjeff#endif
613121790SjeffSYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
614123433Sjeff	"Path used to search the init process");
615123433Sjeff
616123433Sjeff/*
617123433Sjeff * Shutdown timeout of init(8).
618123433Sjeff * Unused within kernel, but used to control init(8), hence do not remove.
619123433Sjeff */
620123433Sjeff#ifndef INIT_SHUTDOWN_TIMEOUT
621123433Sjeff#define INIT_SHUTDOWN_TIMEOUT 120
622123433Sjeff#endif
623123433Sjeffstatic int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT;
624123433SjeffSYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout,
625123433Sjeff	CTLFLAG_RW, &init_shutdown_timeout, 0, "");
626123433Sjeff
627123433Sjeff/*
628123433Sjeff * Start the initial user process; try exec'ing each pathname in init_path.
629123433Sjeff * The program is invoked with one argument containing the boot flags.
630123433Sjeff */
631123433Sjeffstatic void
632123433Sjeffstart_init(void *dummy)
633123433Sjeff{
634123433Sjeff	vm_offset_t addr;
635123433Sjeff	struct execve_args args;
636123433Sjeff	int options, error;
637123433Sjeff	char *var, *path, *next, *s;
638123433Sjeff	char *ucp, **uap, *arg0, *arg1;
639123433Sjeff	struct thread *td;
640123433Sjeff	struct proc *p;
641123433Sjeff
642123433Sjeff	mtx_lock(&Giant);
643123433Sjeff
644123433Sjeff	GIANT_REQUIRED;
645123433Sjeff
646123433Sjeff	td = curthread;
647123433Sjeff	p = td->td_proc;
648121790Sjeff
649121790Sjeff	vfs_mountroot();
650121790Sjeff
651121790Sjeff	/*
652121790Sjeff	 * Need just enough stack to hold the faked-up "execve()" arguments.
653121790Sjeff	 */
654121790Sjeff	addr = p->p_sysent->sv_usrstack - PAGE_SIZE;
655121790Sjeff	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
656121790Sjeff			FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
657122848Sjeff		panic("init: couldn't allocate argument space");
658121790Sjeff	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
659121790Sjeff	p->p_vmspace->vm_ssize = 1;
660121790Sjeff
661121790Sjeff	if ((var = getenv("init_path")) != NULL) {
662121790Sjeff		strlcpy(init_path, var, sizeof(init_path));
663121790Sjeff		freeenv(var);
664121790Sjeff	}
665121790Sjeff
666121790Sjeff	for (path = init_path; *path != '\0'; path = next) {
667121790Sjeff		while (*path == ':')
668121790Sjeff			path++;
669121790Sjeff		if (*path == '\0')
670121790Sjeff			break;
671121790Sjeff		for (next = path; *next != '\0' && *next != ':'; next++)
672121790Sjeff			/* nothing */ ;
673123529Sjeff		if (bootverbose)
674121790Sjeff			printf("start_init: trying %.*s\n", (int)(next - path),
675121790Sjeff			    path);
676121790Sjeff
677121790Sjeff		/*
678121790Sjeff		 * Move out the boot flag argument.
679121790Sjeff		 */
680121790Sjeff		options = 0;
681121790Sjeff		ucp = (char *)p->p_sysent->sv_usrstack;
682122848Sjeff		(void)subyte(--ucp, 0);		/* trailing zero */
683121790Sjeff		if (boothowto & RB_SINGLE) {
684121790Sjeff			(void)subyte(--ucp, 's');
685121790Sjeff			options = 1;
686121790Sjeff		}
687121790Sjeff#ifdef notyet
688121790Sjeff                if (boothowto & RB_FASTBOOT) {
689121790Sjeff			(void)subyte(--ucp, 'f');
690121790Sjeff			options = 1;
691121790Sjeff		}
692121790Sjeff#endif
693121790Sjeff
694121790Sjeff#ifdef BOOTCDROM
695121790Sjeff		(void)subyte(--ucp, 'C');
696121790Sjeff		options = 1;
697121790Sjeff#endif
698121790Sjeff
699121790Sjeff		if (options == 0)
700121790Sjeff			(void)subyte(--ucp, '-');
701121790Sjeff		(void)subyte(--ucp, '-');		/* leading hyphen */
702121790Sjeff		arg1 = ucp;
703121790Sjeff
704121790Sjeff		/*
705121790Sjeff		 * Move out the file name (also arg 0).
706121790Sjeff		 */
707121790Sjeff		(void)subyte(--ucp, 0);
708123231Speter		for (s = next - 1; s >= path; s--)
709121790Sjeff			(void)subyte(--ucp, *s);
710121790Sjeff		arg0 = ucp;
711121790Sjeff
712121896Sjeff		/*
713121896Sjeff		 * Move out the arg pointers.
714121790Sjeff		 */
715121790Sjeff		uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
716121790Sjeff		(void)suword((caddr_t)--uap, (long)0);	/* terminator */
717121790Sjeff		(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
718121790Sjeff		(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
719121790Sjeff
720121790Sjeff		/*
721121790Sjeff		 * Point at the arguments.
722123433Sjeff		 */
723121790Sjeff		args.fname = arg0;
724121790Sjeff		args.argv = uap;
725121790Sjeff		args.envv = NULL;
726123433Sjeff
727123433Sjeff		/*
728123433Sjeff		 * Now try to exec the program.  If can't for any reason
729123433Sjeff		 * other than it doesn't exist, complain.
730123433Sjeff		 *
731123433Sjeff		 * Otherwise, return via fork_trampoline() all the way
732121790Sjeff		 * to user mode as init!
733121790Sjeff		 */
734123433Sjeff		if ((error = execve(td, &args)) == 0) {
735123433Sjeff			mtx_unlock(&Giant);
736123433Sjeff			return;
737121790Sjeff		}
738123433Sjeff		if (error != ENOENT)
739123433Sjeff			printf("exec %.*s: error %d\n", (int)(next - path),
740123433Sjeff			    path, error);
741123433Sjeff	}
742123433Sjeff	printf("init: not found in path %s\n", init_path);
743123433Sjeff	panic("no init");
744123433Sjeff}
745123685Sjeff
746123685Sjeff/*
747123433Sjeff * Like kproc_create(), but runs in it's own address space.
748123433Sjeff * We do this early to reserve pid 1.
749123433Sjeff *
750123433Sjeff * Note special case - do not make it runnable yet.  Other work
751123685Sjeff * in progress will change this more.
752123685Sjeff */
753123685Sjeffstatic void
754123685Sjeffcreate_init(const void *udata __unused)
755123685Sjeff{
756123685Sjeff	struct ucred *newcred, *oldcred;
757123694Sjeff	int error;
758123433Sjeff
759123433Sjeff	error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc);
760123433Sjeff	if (error)
761123433Sjeff		panic("cannot fork init: %d\n", error);
762123433Sjeff	KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
763123433Sjeff	/* divorce init's credentials from the kernel's */
764123433Sjeff	newcred = crget();
765123433Sjeff	PROC_LOCK(initproc);
766123433Sjeff	initproc->p_flag |= P_SYSTEM | P_INMEM;
767123433Sjeff	oldcred = initproc->p_ucred;
768123433Sjeff	crcopy(newcred, oldcred);
769123433Sjeff#ifdef MAC
770123433Sjeff	mac_cred_create_init(newcred);
771123433Sjeff#endif
772123433Sjeff#ifdef AUDIT
773123433Sjeff	audit_cred_proc1(newcred);
774123433Sjeff#endif
775123433Sjeff	initproc->p_ucred = newcred;
776123433Sjeff	PROC_UNLOCK(initproc);
777123433Sjeff	crfree(oldcred);
778123433Sjeff	cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
779123433Sjeff	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
780123433Sjeff}
781123433SjeffSYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL);
782123433Sjeff
783123433Sjeff/*
784123433Sjeff * Make it runnable now.
785123433Sjeff */
786123433Sjeffstatic void
787121790Sjeffkick_init(const void *udata __unused)
788121790Sjeff{
789117326Sjeff	struct thread *td;
790121790Sjeff
791117326Sjeff	td = FIRST_THREAD_IN_PROC(initproc);
792117326Sjeff	thread_lock(td);
793121790Sjeff	TD_SET_CAN_RUN(td);
794121790Sjeff	sched_add(td, SRQ_BORING);
795110267Sjeff	thread_unlock(td);
796110267Sjeff}
797110267SjeffSYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL);
798110267Sjeff