init_main.c revision 1.351
1/*	$NetBSD: init_main.c,v 1.351 2008/04/24 08:51:06 sborrill Exp $	*/
2
3/*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by the NetBSD
18 *	Foundation, Inc. and its contributors.
19 * 4. Neither the name of The NetBSD Foundation nor the names of its
20 *    contributors may be used to endorse or promote products derived
21 *    from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/*
37 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
38 *	The Regents of the University of California.  All rights reserved.
39 * (c) UNIX System Laboratories, Inc.
40 * All or some portions of this file are derived from material licensed
41 * to the University of California by American Telephone and Telegraph
42 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
43 * the permission of UNIX System Laboratories, Inc.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 *    notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 *    notice, this list of conditions and the following disclaimer in the
52 *    documentation and/or other materials provided with the distribution.
53 * 3. Neither the name of the University nor the names of its contributors
54 *    may be used to endorse or promote products derived from this software
55 *    without specific prior written permission.
56 *
57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67 * SUCH DAMAGE.
68 *
69 *	@(#)init_main.c	8.16 (Berkeley) 5/14/95
70 */
71
72/*
73 * Copyright (c) 1995 Christopher G. Demetriou.  All rights reserved.
74 *
75 * Redistribution and use in source and binary forms, with or without
76 * modification, are permitted provided that the following conditions
77 * are met:
78 * 1. Redistributions of source code must retain the above copyright
79 *    notice, this list of conditions and the following disclaimer.
80 * 2. Redistributions in binary form must reproduce the above copyright
81 *    notice, this list of conditions and the following disclaimer in the
82 *    documentation and/or other materials provided with the distribution.
83 * 3. All advertising materials mentioning features or use of this software
84 *    must display the following acknowledgement:
85 *	This product includes software developed by the University of
86 *	California, Berkeley and its contributors.
87 * 4. Neither the name of the University nor the names of its contributors
88 *    may be used to endorse or promote products derived from this software
89 *    without specific prior written permission.
90 *
91 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
92 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
93 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
94 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
95 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
96 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
97 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
98 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
99 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
100 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
101 * SUCH DAMAGE.
102 *
103 *	@(#)init_main.c	8.16 (Berkeley) 5/14/95
104 */
105
106#include <sys/cdefs.h>
107__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.351 2008/04/24 08:51:06 sborrill Exp $");
108
109#include "opt_ipsec.h"
110#include "opt_ntp.h"
111#include "opt_pipe.h"
112#include "opt_posix.h"
113#include "opt_syscall_debug.h"
114#include "opt_sysv.h"
115#include "opt_fileassoc.h"
116#include "opt_ktrace.h"
117#include "opt_pax.h"
118
119#include "rnd.h"
120#include "sysmon_envsys.h"
121#include "sysmon_power.h"
122#include "sysmon_taskq.h"
123#include "sysmon_wdog.h"
124#include "veriexec.h"
125
126#include <sys/param.h>
127#include <sys/acct.h>
128#include <sys/filedesc.h>
129#include <sys/file.h>
130#include <sys/errno.h>
131#include <sys/callout.h>
132#include <sys/cpu.h>
133#include <sys/kernel.h>
134#include <sys/kmem.h>
135#include <sys/mount.h>
136#include <sys/proc.h>
137#include <sys/kthread.h>
138#include <sys/resourcevar.h>
139#include <sys/signalvar.h>
140#include <sys/systm.h>
141#include <sys/vnode.h>
142#include <sys/fstrans.h>
143#include <sys/tty.h>
144#include <sys/conf.h>
145#include <sys/disklabel.h>
146#include <sys/buf.h>
147#include <sys/device.h>
148#include <sys/exec.h>
149#include <sys/socketvar.h>
150#include <sys/protosw.h>
151#include <sys/percpu.h>
152#include <sys/pset.h>
153#include <sys/sysctl.h>
154#include <sys/reboot.h>
155#include <sys/user.h>
156#include <sys/sysctl.h>
157#include <sys/event.h>
158#include <sys/mbuf.h>
159#include <sys/sched.h>
160#include <sys/sleepq.h>
161#include <sys/iostat.h>
162#include <sys/vmem.h>
163#include <sys/uuid.h>
164#include <sys/extent.h>
165#include <sys/disk.h>
166#include <sys/mqueue.h>
167#include <sys/msgbuf.h>
168#include <sys/module.h>
169#include <sys/event.h>
170#ifdef FAST_IPSEC
171#include <netipsec/ipsec.h>
172#endif
173#ifdef SYSVSHM
174#include <sys/shm.h>
175#endif
176#ifdef SYSVSEM
177#include <sys/sem.h>
178#endif
179#ifdef SYSVMSG
180#include <sys/msg.h>
181#endif
182#ifdef P1003_1B_SEMAPHORE
183#include <sys/ksem.h>
184#endif
185#include <sys/domain.h>
186#include <sys/namei.h>
187#if NRND > 0
188#include <sys/rnd.h>
189#endif
190#include <sys/pipe.h>
191#ifdef LKM
192#include <sys/lkm.h>
193#endif
194#if NVERIEXEC > 0
195#include <sys/verified_exec.h>
196#endif /* NVERIEXEC > 0 */
197#ifdef KTRACE
198#include <sys/ktrace.h>
199#endif
200#include <sys/kauth.h>
201#include <net80211/ieee80211_netbsd.h>
202
203#include <sys/syscall.h>
204#include <sys/syscallargs.h>
205
206#if defined(PAX_MPROTECT) || defined(PAX_SEGVGUARD) || defined(PAX_ASLR)
207#include <sys/pax.h>
208#endif /* PAX_MPROTECT || PAX_SEGVGUARD || PAX_ASLR */
209
210#include <ufs/ufs/quota.h>
211
212#include <miscfs/genfs/genfs.h>
213#include <miscfs/syncfs/syncfs.h>
214
215#include <sys/cpu.h>
216
217#include <uvm/uvm.h>
218
219#if NSYSMON_TASKQ > 0
220#include <dev/sysmon/sysmon_taskq.h>
221#endif
222
223#include <dev/cons.h>
224
225#if NSYSMON_ENVSYS > 0 || NSYSMON_POWER > 0 || NSYSMON_WDOG > 0
226#include <dev/sysmon/sysmonvar.h>
227#endif
228
229#include <net/if.h>
230#include <net/raw_cb.h>
231
232#include <secmodel/secmodel.h>
233
234extern struct proc proc0;
235extern struct lwp lwp0;
236extern struct cwdinfo cwdi0;
237extern time_t rootfstime;
238
239#ifndef curlwp
240struct	lwp *curlwp = &lwp0;
241#endif
242struct	proc *initproc;
243
244struct	vnode *rootvp, *swapdev_vp;
245int	boothowto;
246int	cold = 1;			/* still working on startup */
247struct timeval boottime;	        /* time at system startup - will only follow settime deltas */
248
249volatile int start_init_exec;		/* semaphore for start_init() */
250
251static void check_console(struct lwp *l);
252static void start_init(void *);
253void main(void);
254void ssp_init(void);
255
256#if defined(__SSP__) || defined(__SSP_ALL__)
257long __stack_chk_guard[8] = {0, 0, 0, 0, 0, 0, 0, 0};
258void __stack_chk_fail(void);
259
260void
261__stack_chk_fail(void)
262{
263	panic("stack overflow detected; terminated");
264}
265
266void
267ssp_init(void)
268{
269	int s;
270
271#ifdef DIAGNOSTIC
272	printf("Initializing SSP:");
273#endif
274	/*
275	 * We initialize ssp here carefully:
276	 *	1. after we got some entropy
277	 *	2. without calling a function
278	 */
279	size_t i;
280	long guard[__arraycount(__stack_chk_guard)];
281
282	arc4randbytes(guard, sizeof(guard));
283	s = splhigh();
284	for (i = 0; i < __arraycount(guard); i++)
285		__stack_chk_guard[i] = guard[i];
286	splx(s);
287#ifdef DIAGNOSTIC
288	for (i = 0; i < __arraycount(guard); i++)
289		printf("%lx ", guard[i]);
290	printf("\n");
291#endif
292}
293#else
294void
295ssp_init(void)
296{
297
298}
299#endif
300
301void __secmodel_none(void);
302__weak_alias(secmodel_start,__secmodel_none);
303void
304__secmodel_none(void)
305{
306	return;
307}
308
309/*
310 * System startup; initialize the world, create process 0, mount root
311 * filesystem, and fork to create init and pagedaemon.  Most of the
312 * hard work is done in the lower-level initialization routines including
313 * startup(), which does memory initialization and autoconfiguration.
314 */
315void
316main(void)
317{
318	struct timeval time;
319	struct lwp *l;
320	struct proc *p;
321	int s, error;
322#ifdef NVNODE_IMPLICIT
323	int usevnodes;
324#endif
325	CPU_INFO_ITERATOR cii;
326	struct cpu_info *ci;
327
328	l = &lwp0;
329#ifndef LWP0_CPU_INFO
330	l->l_cpu = curcpu();
331#endif
332
333	/*
334	 * Attempt to find console and initialize
335	 * in case of early panic or other messages.
336	 */
337	consinit();
338
339	kernel_lock_init();
340
341	uvm_init();
342
343	kmem_init();
344
345	percpu_init();
346
347	/* Initialize lock caches. */
348	mutex_obj_init();
349
350	/* Initialize the extent manager. */
351	extent_init();
352
353	/* Do machine-dependent initialization. */
354	cpu_startup();
355
356	/* Start module system. */
357	module_init();
358
359	/* Initialize callouts, part 1. */
360	callout_startup();
361
362	/*
363	 * Initialize the kernel authorization subsystem and start the
364	 * default security model, if any. We need to do this early
365	 * enough so that subsystems relying on any of the aforementioned
366	 * can work properly. Since the security model may dictate the
367	 * credential inheritance policy, it is needed at least before
368	 * any process is created, specifically proc0.
369	 */
370	kauth_init();
371	secmodel_start();
372
373	/* Initialize the buffer cache */
374	bufinit();
375
376	/* Initialize sockets. */
377	soinit();
378
379	/*
380	 * The following things must be done before autoconfiguration.
381	 */
382	evcnt_init();		/* initialize event counters */
383#if NRND > 0
384	rnd_init();		/* initialize RNG */
385#endif
386
387	/* Initialize process and pgrp structures. */
388	procinit();
389	lwpinit();
390
391	/* Initialize signal-related data structures. */
392	signal_init();
393
394	/* Initialize resource management. */
395	resource_init();
396
397	/* Create process 0 (the swapper). */
398	proc0_init();
399
400	/* Initialize the UID hash table. */
401	uid_init();
402
403	/* Charge root for one process. */
404	(void)chgproccnt(0, 1);
405
406	/* Initialize timekeeping. */
407	time_init();
408
409	/* Initialize the run queues, turnstiles and sleep queues. */
410	mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE);
411	sched_rqinit();
412	turnstile_init();
413	sleeptab_init(&sleeptab);
414
415	/* Initialize processor-sets */
416	psets_init();
417
418	/* MI initialization of the boot cpu */
419	error = mi_cpu_attach(curcpu());
420	KASSERT(error == 0);
421
422	/* Initialize timekeeping, part 2. */
423	time_init2();
424
425	/*
426	 * Initialize mbuf's.  Do this now because we might attempt to
427	 * allocate mbufs or mbuf clusters during autoconfiguration.
428	 */
429	mbinit();
430
431	/* Initialize the sysctl subsystem. */
432	sysctl_init();
433
434	/* Initialize I/O statistics. */
435	iostat_init();
436
437	/* Initialize the log device. */
438	loginit();
439
440	/* Initialize the file systems. */
441#ifdef NVNODE_IMPLICIT
442	/*
443	 * If maximum number of vnodes in namei vnode cache is not explicitly
444	 * defined in kernel config, adjust the number such as we use roughly
445	 * 1.0% of memory for vnode cache (but not less than NVNODE vnodes).
446	 */
447	usevnodes =
448	    calc_cache_size(kernel_map, 1, VNODE_VA_MAXPCT) / sizeof(vnode_t);
449	if (usevnodes > desiredvnodes)
450		desiredvnodes = usevnodes;
451#endif
452	vfsinit();
453
454	/* Initialize fstrans. */
455	fstrans_init();
456
457	/* Initialize the file descriptor system. */
458	fd_sys_init();
459
460	/* Initialize kqueue. */
461	kqueue_init();
462
463	/* Initialize asynchronous I/O. */
464	aio_sysinit();
465
466	/* Initialize message queues. */
467	mqueue_sysinit();
468
469	/* Initialize the system monitor subsystems. */
470#if NSYSMON_TASKQ > 0
471	sysmon_task_queue_preinit();
472#endif
473
474#if NSYSMON_ENVSYS > 0
475	sysmon_envsys_init();
476#endif
477
478#if NSYSMON_POWER > 0
479	sysmon_power_init();
480#endif
481
482#if NSYSMON_WDOG > 0
483	sysmon_wdog_init();
484#endif
485
486	inittimecounter();
487	ntp_init();
488
489	/* Initialize the device switch tables. */
490	devsw_init();
491
492	/* Initialize tty subsystem. */
493	tty_init();
494	ttyldisc_init();
495
496	/* Initialize the buffer cache, part 2. */
497	bufinit2();
498
499	/* Initialize the disk wedge subsystem. */
500	dkwedge_init();
501
502	/* Configure the system hardware.  This will enable interrupts. */
503	configure();
504
505	ubc_init();		/* must be after autoconfig */
506
507#ifdef SYSVSHM
508	/* Initialize System V style shared memory. */
509	shminit();
510#endif
511
512#ifdef SYSVSEM
513	/* Initialize System V style semaphores. */
514	seminit();
515#endif
516
517#ifdef SYSVMSG
518	/* Initialize System V style message queues. */
519	msginit();
520#endif
521
522#ifdef P1003_1B_SEMAPHORE
523	/* Initialize posix semaphores */
524	ksem_init();
525#endif
526
527#if NVERIEXEC > 0
528	/*
529	 * Initialise the Veriexec subsystem.
530	 */
531	veriexec_init();
532#endif /* NVERIEXEC > 0 */
533
534#if defined(PAX_MPROTECT) || defined(PAX_SEGVGUARD) || defined(PAX_ASLR)
535	pax_init();
536#endif /* PAX_MPROTECT || PAX_SEGVGUARD || PAX_ASLR */
537
538#ifdef	FAST_IPSEC
539	/* Attach network crypto subsystem */
540	ipsec_attach();
541#endif
542
543	/*
544	 * Initialize protocols.  Block reception of incoming packets
545	 * until everything is ready.
546	 */
547	s = splnet();
548	ifinit();
549	domaininit();
550	if_attachdomain();
551	splx(s);
552
553#ifdef GPROF
554	/* Initialize kernel profiling. */
555	kmstartup();
556#endif
557
558	/* Initialize system accounting. */
559	acct_init();
560
561#ifndef PIPE_SOCKETPAIR
562	/* Initialize pipes. */
563	pipe_init();
564#endif
565
566#ifdef KTRACE
567	/* Initialize ktrace. */
568	ktrinit();
569#endif
570
571	/* Initialize the UUID system calls. */
572	uuid_init();
573
574	/*
575	 * Create process 1 (init(8)).  We do this now, as Unix has
576	 * historically had init be process 1, and changing this would
577	 * probably upset a lot of people.
578	 *
579	 * Note that process 1 won't immediately exec init(8), but will
580	 * wait for us to inform it that the root file system has been
581	 * mounted.
582	 */
583	if (fork1(l, 0, SIGCHLD, NULL, 0, start_init, NULL, NULL, &initproc))
584		panic("fork init");
585
586	/*
587	 * Load any remaining builtin modules, and hand back temporary
588	 * storage to the VM system.
589	 */
590	module_init_class(MODULE_CLASS_ANY);
591	module_jettison();
592
593	/*
594	 * Finalize configuration now that all real devices have been
595	 * found.  This needs to be done before the root device is
596	 * selected, since finalization may create the root device.
597	 */
598	config_finalize();
599
600	/*
601	 * Now that autoconfiguration has completed, we can determine
602	 * the root and dump devices.
603	 */
604	cpu_rootconf();
605	cpu_dumpconf();
606
607	/* Mount the root file system. */
608	do {
609		domountroothook();
610		if ((error = vfs_mountroot())) {
611			printf("cannot mount root, error = %d\n", error);
612			boothowto |= RB_ASKNAME;
613			setroot(root_device,
614			    (rootdev != NODEV) ? DISKPART(rootdev) : 0);
615		}
616	} while (error != 0);
617	mountroothook_destroy();
618
619	/*
620	 * Initialise the time-of-day clock, passing the time recorded
621	 * in the root filesystem (if any) for use by systems that
622	 * don't have a non-volatile time-of-day device.
623	 */
624	inittodr(rootfstime);
625
626	CIRCLEQ_FIRST(&mountlist)->mnt_flag |= MNT_ROOTFS;
627	CIRCLEQ_FIRST(&mountlist)->mnt_op->vfs_refcount++;
628
629	/*
630	 * Get the vnode for '/'.  Set filedesc0.fd_fd.fd_cdir to
631	 * reference it.
632	 */
633	error = VFS_ROOT(CIRCLEQ_FIRST(&mountlist), &rootvnode);
634	if (error)
635		panic("cannot find root vnode, error=%d", error);
636	cwdi0.cwdi_cdir = rootvnode;
637	VREF(cwdi0.cwdi_cdir);
638	VOP_UNLOCK(rootvnode, 0);
639	cwdi0.cwdi_rdir = NULL;
640
641	/*
642	 * Now that root is mounted, we can fixup initproc's CWD
643	 * info.  All other processes are kthreads, which merely
644	 * share proc0's CWD info.
645	 */
646	initproc->p_cwdi->cwdi_cdir = rootvnode;
647	VREF(initproc->p_cwdi->cwdi_cdir);
648	initproc->p_cwdi->cwdi_rdir = NULL;
649
650	/*
651	 * Now can look at time, having had a chance to verify the time
652	 * from the file system.  Reset l->l_rtime as it may have been
653	 * munched in mi_switch() after the time got set.
654	 */
655	getmicrotime(&time);
656	boottime = time;
657	mutex_enter(&proclist_lock);
658	LIST_FOREACH(p, &allproc, p_list) {
659		KASSERT((p->p_flag & PK_MARKER) == 0);
660		mutex_enter(&p->p_smutex);
661		p->p_stats->p_start = time;
662		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
663			lwp_lock(l);
664			memset(&l->l_rtime, 0, sizeof(l->l_rtime));
665			lwp_unlock(l);
666		}
667		mutex_exit(&p->p_smutex);
668	}
669	mutex_exit(&proclist_lock);
670	binuptime(&curlwp->l_stime);
671
672	for (CPU_INFO_FOREACH(cii, ci)) {
673		ci->ci_schedstate.spc_lastmod = time_second;
674	}
675
676	/* Create the pageout daemon kernel thread. */
677	uvm_swap_init();
678	if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL, uvm_pageout,
679	    NULL, NULL, "pgdaemon"))
680		panic("fork pagedaemon");
681
682	/* Create the filesystem syncer kernel thread. */
683	if (kthread_create(PRI_IOFLUSH, KTHREAD_MPSAFE, NULL, sched_sync,
684	    NULL, NULL, "ioflush"))
685		panic("fork syncer");
686
687	/* Create the aiodone daemon kernel thread. */
688	if (workqueue_create(&uvm.aiodone_queue, "aiodoned",
689	    uvm_aiodone_worker, NULL, PRI_VM, IPL_NONE, WQ_MPSAFE))
690		panic("fork aiodoned");
691
692	vmem_rehash_start();
693
694	/* Initialize exec structures */
695	exec_init(1);
696
697	/*
698	 * Okay, now we can let init(8) exec!  It's off to userland!
699	 */
700	start_init_exec = 1;
701	wakeup(&start_init_exec);
702
703	/* The scheduler is an infinite loop. */
704	uvm_scheduler();
705	/* NOTREACHED */
706}
707
708static void
709check_console(struct lwp *l)
710{
711	struct nameidata nd;
712	int error;
713
714	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/console");
715	error = namei(&nd);
716	if (error == 0)
717		vrele(nd.ni_vp);
718	else if (error == ENOENT)
719		printf("warning: no /dev/console\n");
720	else
721		printf("warning: lookup /dev/console: error %d\n", error);
722}
723
724/*
725 * List of paths to try when searching for "init".
726 */
727static const char * const initpaths[] = {
728	"/sbin/init",
729	"/sbin/oinit",
730	"/sbin/init.bak",
731	NULL,
732};
733
734/*
735 * Start the initial user process; try exec'ing each pathname in "initpaths".
736 * The program is invoked with one argument containing the boot flags.
737 */
738static void
739start_init(void *arg)
740{
741	struct lwp *l = arg;
742	struct proc *p = l->l_proc;
743	vaddr_t addr;
744	struct sys_execve_args /* {
745		syscallarg(const char *) path;
746		syscallarg(char * const *) argp;
747		syscallarg(char * const *) envp;
748	} */ args;
749	int options, i, error;
750	register_t retval[2];
751	char flags[4], *flagsp;
752	const char *path, *slash;
753	char *ucp, **uap, *arg0, *arg1 = NULL;
754	char ipath[129];
755	int ipx, len;
756
757	/*
758	 * Now in process 1.
759	 */
760	strncpy(p->p_comm, "init", MAXCOMLEN);
761
762	/*
763	 * Wait for main() to tell us that it's safe to exec.
764	 */
765	while (start_init_exec == 0)
766		(void) tsleep(&start_init_exec, PWAIT, "initexec", 0);
767
768	/*
769	 * This is not the right way to do this.  We really should
770	 * hand-craft a descriptor onto /dev/console to hand to init,
771	 * but that's a _lot_ more work, and the benefit from this easy
772	 * hack makes up for the "good is the enemy of the best" effect.
773	 */
774	check_console(l);
775
776	/*
777	 * Need just enough stack to hold the faked-up "execve()" arguments.
778	 */
779	addr = (vaddr_t)STACK_ALLOC(USRSTACK, PAGE_SIZE);
780	if (uvm_map(&p->p_vmspace->vm_map, &addr, PAGE_SIZE,
781                    NULL, UVM_UNKNOWN_OFFSET, 0,
782                    UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
783		    UVM_ADV_NORMAL,
784                    UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)) != 0)
785		panic("init: couldn't allocate argument space");
786	p->p_vmspace->vm_maxsaddr = (void *)STACK_MAX(addr, PAGE_SIZE);
787
788	ipx = 0;
789	while (1) {
790		if (boothowto & RB_ASKNAME) {
791			printf("init path");
792			if (initpaths[ipx])
793				printf(" (default %s)", initpaths[ipx]);
794			printf(": ");
795			len = cngetsn(ipath, sizeof(ipath)-1);
796			if (len == 0) {
797				if (initpaths[ipx])
798					path = initpaths[ipx++];
799				else
800					continue;
801			} else {
802				ipath[len] = '\0';
803				path = ipath;
804			}
805		} else {
806			if ((path = initpaths[ipx++]) == NULL)
807				break;
808		}
809
810		ucp = (char *)USRSTACK;
811
812		/*
813		 * Construct the boot flag argument.
814		 */
815		flagsp = flags;
816		*flagsp++ = '-';
817		options = 0;
818
819		if (boothowto & RB_SINGLE) {
820			*flagsp++ = 's';
821			options = 1;
822		}
823#ifdef notyet
824		if (boothowto & RB_FASTBOOT) {
825			*flagsp++ = 'f';
826			options = 1;
827		}
828#endif
829
830		/*
831		 * Move out the flags (arg 1), if necessary.
832		 */
833		if (options != 0) {
834			*flagsp++ = '\0';
835			i = flagsp - flags;
836#ifdef DEBUG
837			printf("init: copying out flags `%s' %d\n", flags, i);
838#endif
839			arg1 = STACK_ALLOC(ucp, i);
840			ucp = STACK_MAX(arg1, i);
841			(void)copyout((void *)flags, arg1, i);
842		}
843
844		/*
845		 * Move out the file name (also arg 0).
846		 */
847		i = strlen(path) + 1;
848#ifdef DEBUG
849		printf("init: copying out path `%s' %d\n", path, i);
850#else
851		if (boothowto & RB_ASKNAME || path != initpaths[0])
852			printf("init: trying %s\n", path);
853#endif
854		arg0 = STACK_ALLOC(ucp, i);
855		ucp = STACK_MAX(arg0, i);
856		(void)copyout(path, arg0, i);
857
858		/*
859		 * Move out the arg pointers.
860		 */
861		ucp = (void *)STACK_ALIGN(ucp, ALIGNBYTES);
862		uap = (char **)STACK_ALLOC(ucp, sizeof(char *) * 3);
863		SCARG(&args, path) = arg0;
864		SCARG(&args, argp) = uap;
865		SCARG(&args, envp) = NULL;
866		slash = strrchr(path, '/');
867		if (slash)
868			(void)suword((void *)uap++,
869			    (long)arg0 + (slash + 1 - path));
870		else
871			(void)suword((void *)uap++, (long)arg0);
872		if (options != 0)
873			(void)suword((void *)uap++, (long)arg1);
874		(void)suword((void *)uap++, 0);	/* terminator */
875
876		/*
877		 * Now try to exec the program.  If can't for any reason
878		 * other than it doesn't exist, complain.
879		 */
880		error = sys_execve(l, &args, retval);
881		if (error == 0 || error == EJUSTRETURN) {
882			KERNEL_UNLOCK_LAST(l);
883			return;
884		}
885		printf("exec %s: error %d\n", path, error);
886	}
887	printf("init: not found\n");
888	panic("no init");
889}
890
891/*
892 * calculate cache size from physmem and vm_map size.
893 */
894vaddr_t
895calc_cache_size(struct vm_map *map, int pct, int va_pct)
896{
897	paddr_t t;
898
899	/* XXX should consider competing cache if any */
900	/* XXX should consider submaps */
901	t = (uintmax_t)physmem * pct / 100 * PAGE_SIZE;
902	if (map != NULL) {
903		vsize_t vsize;
904
905		vsize = vm_map_max(map) - vm_map_min(map);
906		vsize = (uintmax_t)vsize * va_pct / 100;
907		if (t > vsize) {
908			t = vsize;
909		}
910	}
911	return t;
912}
913