init_main.c revision 24101
1/*
2 * Copyright (c) 1995 Terrence R. Lambert
3 * All rights reserved.
4 *
5 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
42 * $Id: init_main.c,v 1.58 1997/03/01 17:49:09 wosch Exp $
43 */
44
45#include "opt_rlimit.h"
46#include "opt_devfs.h"
47
48#include <sys/param.h>
49#include <sys/file.h>
50#include <sys/filedesc.h>
51#include <sys/kernel.h>
52#include <sys/mount.h>
53#include <sys/sysctl.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/systm.h>
58#include <sys/vnode.h>
59#include <sys/sysent.h>
60#include <sys/reboot.h>
61#include <sys/sysproto.h>
62#include <sys/vmmeter.h>
63
64#include <machine/cpu.h>
65
66#include <vm/vm.h>
67#include <vm/vm_param.h>
68#include <vm/vm_prot.h>
69#include <sys/lock.h>
70#include <vm/pmap.h>
71#include <vm/vm_map.h>
72#include <sys/user.h>
73#include <sys/copyright.h>
74
75extern struct linker_set	sysinit_set;	/* XXX */
76
77extern void __main __P((void));
78extern void main __P((void *framep));
79
80/* Components of the first process -- never freed. */
81static struct session session0;
82static struct pgrp pgrp0;
83struct	proc proc0;
84static struct pcred cred0;
85static struct filedesc0 filedesc0;
86static struct plimit limit0;
87static struct vmspace vmspace0;
88struct	proc *curproc = &proc0;
89struct	proc *initproc;
90
91int cmask = CMASK;
92extern	struct user *proc0paddr;
93
94struct	vnode *rootvp;
95int	boothowto;
96
97struct	timeval boottime;
98SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime,
99	CTLFLAG_RW, &boottime, timeval, "");
100
101struct	timeval runtime;
102
103/*
104 * Promiscuous argument pass for start_init()
105 *
106 * This is a kludge because we use a return from main() rather than a call
107 * to a new routine in locore.s to kick the kernel alive from locore.s.
108 */
109static void	*init_framep;
110
111
112#if __GNUC__ >= 2
113void __main() {}
114#endif
115
116
117/*
118 * This ensures that there is at least one entry so that the sysinit_set
119 * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
120 * executed.
121 */
122SYSINIT(placeholder, SI_SUB_DUMMY,SI_ORDER_ANY, NULL, NULL)
123
124
125/*
126 * System startup; initialize the world, create process 0, mount root
127 * filesystem, and fork to create init and pagedaemon.  Most of the
128 * hard work is done in the lower-level initialization routines including
129 * startup(), which does memory initialization and autoconfiguration.
130 *
131 * This allows simple addition of new kernel subsystems that require
132 * boot time initialization.  It also allows substitution of subsystem
133 * (for instance, a scheduler, kernel profiler, or VM system) by object
134 * module.  Finally, it allows for optional "kernel threads", like an LFS
135 * cleaner.
136 */
137void
138main(framep)
139	void *framep;
140{
141
142	register struct sysinit **sipp;		/* system initialization*/
143	register struct sysinit **xipp;		/* interior loop of sort*/
144	register struct sysinit *save;		/* bubble*/
145	int			rval[2];	/* SI_TYPE_KTHREAD support*/
146
147	/*
148	 * Save the locore.s frame pointer for start_init().
149	 */
150	init_framep = framep;
151
152	/*
153	 * Perform a bubble sort of the system initialization objects by
154	 * their subsystem (primary key) and order (secondary key).
155	 *
156	 * Since some things care about execution order, this is the
157	 * operation which ensures continued function.
158	 */
159	for( sipp = (struct sysinit **)sysinit_set.ls_items; *sipp; sipp++) {
160		for( xipp = sipp + 1; *xipp; xipp++) {
161			if( (*sipp)->subsystem < (*xipp)->subsystem ||
162			    ( (*sipp)->subsystem == (*xipp)->subsystem &&
163			      (*sipp)->order < (*xipp)->order))
164				continue;	/* skip*/
165			save = *sipp;
166			*sipp = *xipp;
167			*xipp = save;
168		}
169	}
170
171	/*
172	 * Traverse the (now) ordered list of system initialization tasks.
173	 * Perform each task, and continue on to the next task.
174	 *
175	 * The last item on the list is expected to be the scheduler,
176	 * which will not return.
177	 */
178	for( sipp = (struct sysinit **)sysinit_set.ls_items; *sipp; sipp++) {
179		if( (*sipp)->subsystem == SI_SUB_DUMMY)
180			continue;	/* skip dummy task(s)*/
181
182		switch( (*sipp)->type) {
183		case SI_TYPE_DEFAULT:
184			/* no special processing*/
185			(*((*sipp)->func))( (*sipp)->udata);
186			break;
187
188		case SI_TYPE_KTHREAD:
189			/* kernel thread*/
190			if (fork(&proc0, NULL, rval))
191				panic("fork kernel process");
192			if (rval[1]) {
193				(*((*sipp)->func))( (*sipp)->udata);
194				/*
195				 * The call to start "init" returns
196				 * here after the scheduler has been
197				 * started, and returns to the caller
198				 * in i386/i386/locore.s.  This is a
199				 * necessary part of initialization
200				 * and is rather non-obvious.
201				 *
202				 * No other "kernel threads" should
203				 * return here.  Call panic() instead.
204				 */
205				return;
206			}
207			break;
208
209		default:
210			panic( "init_main: unrecognized init type");
211		}
212	}
213
214	/* NOTREACHED*/
215}
216
217
218/*
219 * Start a kernel process.  This is called after a fork() call in
220 * main() in the file kern/init_main.c.
221 *
222 * This function is used to start "internal" daemons.
223 */
224/* ARGSUSED*/
225void
226kproc_start(udata)
227	void *udata;
228{
229	struct kproc_desc	*kp = udata;
230	struct proc		*p = curproc;
231
232	/* save a global descriptor, if desired*/
233	if( kp->global_procpp != NULL)
234		*kp->global_procpp	= p;
235
236	/* this is a non-swapped system process*/
237	p->p_flag |= P_INMEM | P_SYSTEM;
238
239	/* set up arg0 for 'ps', et al*/
240	strcpy( p->p_comm, kp->arg0);
241
242	/* call the processes' main()...*/
243	(*kp->func)();
244
245	/* NOTREACHED */
246	panic("kproc_start: %s", kp->arg0);
247}
248
249
250/*
251 ***************************************************************************
252 ****
253 **** The following SYSINIT's belong elsewhere, but have not yet
254 **** been moved.
255 ****
256 ***************************************************************************
257 */
258#ifdef OMIT
259/*
260 * Handled by vfs_mountroot (bad idea) at this time... should be
261 * done the same as 4.4Lite2.
262 */
263SYSINIT(swapinit, SI_SUB_SWAP, SI_ORDER_FIRST, swapinit, NULL)
264#endif	/* OMIT*/
265
266static void print_caddr_t __P((void *data));
267static void
268print_caddr_t(data)
269	void *data;
270{
271	printf("%s", (char *)data);
272}
273SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright)
274
275
276/*
277 ***************************************************************************
278 ****
279 **** The two following SYSINT's are proc0 specific glue code.  I am not
280 **** convinced that they can not be safely combined, but their order of
281 **** operation has been maintained as the same as the original init_main.c
282 **** for right now.
283 ****
284 **** These probably belong in init_proc.c or kern_proc.c, since they
285 **** deal with proc0 (the fork template process).
286 ****
287 ***************************************************************************
288 */
289/* ARGSUSED*/
290static void proc0_init __P((void *dummy));
291static void
292proc0_init(dummy)
293	void *dummy;
294{
295	register struct proc		*p;
296	register struct filedesc0	*fdp;
297	register unsigned i;
298
299	/*
300	 * Initialize the current process pointer (curproc) before
301	 * any possible traps/probes to simplify trap processing.
302	 */
303	p = &proc0;
304	curproc = p;			/* XXX redundant*/
305
306	/*
307	 * Initialize process and pgrp structures.
308	 */
309	procinit();
310
311	/*
312	 * Initialize sleep queue hash table
313	 */
314	sleepinit();
315
316	/*
317	 * Create process 0 (the swapper).
318	 */
319	LIST_INSERT_HEAD(&allproc, p, p_list);
320	p->p_pgrp = &pgrp0;
321	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
322	LIST_INIT(&pgrp0.pg_members);
323	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
324
325	pgrp0.pg_session = &session0;
326	session0.s_count = 1;
327	session0.s_leader = p;
328
329	p->p_sysent = &aout_sysvec;
330
331	p->p_flag = P_INMEM | P_SYSTEM;
332	p->p_stat = SRUN;
333	p->p_nice = NZERO;
334	p->p_rtprio.type = RTP_PRIO_NORMAL;
335	p->p_rtprio.prio = 0;
336
337	bcopy("swapper", p->p_comm, sizeof ("swapper"));
338
339	/* Create credentials. */
340	cred0.p_refcnt = 1;
341	p->p_cred = &cred0;
342	p->p_ucred = crget();
343	p->p_ucred->cr_ngroups = 1;	/* group 0 */
344
345	/* Create the file descriptor table. */
346	fdp = &filedesc0;
347	p->p_fd = &fdp->fd_fd;
348	fdp->fd_fd.fd_refcnt = 1;
349	fdp->fd_fd.fd_cmask = cmask;
350	fdp->fd_fd.fd_ofiles = fdp->fd_dfiles;
351	fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags;
352	fdp->fd_fd.fd_nfiles = NDFILE;
353
354	/* Create the limits structures. */
355	p->p_limit = &limit0;
356	for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
357		limit0.pl_rlimit[i].rlim_cur =
358		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
359	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
360	    limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
361	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
362	    limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
363	i = ptoa(cnt.v_free_count);
364	limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
365	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
366	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
367	limit0.p_refcnt = 1;
368
369	/* Allocate a prototype map so we have something to fork. */
370	p->p_vmspace = &vmspace0;
371	vmspace0.vm_refcnt = 1;
372	pmap_pinit(&vmspace0.vm_pmap);
373	vm_map_init(&vmspace0.vm_map, round_page(VM_MIN_ADDRESS),
374	    trunc_page(VM_MAXUSER_ADDRESS), TRUE);
375	vmspace0.vm_map.pmap = &vmspace0.vm_pmap;
376	p->p_addr = proc0paddr;				/* XXX */
377
378#define INCOMPAT_LITES2
379#ifdef INCOMPAT_LITES2
380	/*
381	 * proc0 needs to have a coherent frame base, too.
382	 * This probably makes the identical call for the init proc
383	 * that happens later unnecessary since it should inherit
384	 * it during the fork.
385	 */
386	cpu_set_init_frame(p, init_framep);			/* XXX! */
387#endif	/* INCOMPAT_LITES2*/
388
389	/*
390	 * We continue to place resource usage info and signal
391	 * actions in the user struct so they're pageable.
392	 */
393	p->p_stats = &p->p_addr->u_stats;
394	p->p_sigacts = &p->p_addr->u_sigacts;
395
396	/*
397	 * Charge root for one process.
398	 */
399	(void)chgproccnt(0, 1);
400}
401SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL)
402
403/* ARGSUSED*/
404static void proc0_post __P((void *dummy));
405static void
406proc0_post(dummy)
407	void *dummy;
408{
409	struct timeval tv;
410
411	/*
412	 * Now can look at time, having had a chance to verify the time
413	 * from the file system.  Reset p->p_rtime as it may have been
414	 * munched in mi_switch() after the time got set.
415	 */
416	gettime(&boottime);
417	proc0.p_stats->p_start = runtime = mono_time = boottime;
418	proc0.p_rtime.tv_sec = proc0.p_rtime.tv_usec = 0;
419
420	/*
421	 * Give the ``random'' number generator a thump.
422	 */
423	microtime(&tv);
424	srandom(tv.tv_sec ^ tv.tv_usec);
425
426	/* Initialize signal state for process 0. */
427	siginit(&proc0);
428}
429SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL)
430
431
432
433
434/*
435 ***************************************************************************
436 ****
437 **** The following SYSINIT's and glue code should be moved to the
438 **** respective files on a per subsystem basis.
439 ****
440 ***************************************************************************
441 */
442/* ARGSUSED*/
443static void sched_setup __P((void *dummy));
444static void
445sched_setup(dummy)
446	void *dummy;
447{
448	/* Kick off timeout driven events by calling first time. */
449	roundrobin(NULL);
450	schedcpu(NULL);
451}
452SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL)
453
454/* ARGSUSED*/
455static void xxx_vfs_mountroot __P((void *fsnamep));
456static void
457xxx_vfs_mountroot(fsnamep)
458	void *fsnamep;
459{
460	/* Mount the root file system. */
461	if (vfs_mountrootfs(*((char **) fsnamep)))
462		panic("cannot mount root");
463}
464SYSINIT(mountroot, SI_SUB_ROOT, SI_ORDER_FIRST, xxx_vfs_mountroot, &mountrootfsname)
465
466/* ARGSUSED*/
467static void xxx_vfs_root_fdtab __P((void *dummy));
468static void
469xxx_vfs_root_fdtab(dummy)
470	void *dummy;
471{
472	register struct filedesc0	*fdp = &filedesc0;
473
474	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
475	if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
476		panic("cannot find root vnode");
477	fdp->fd_fd.fd_cdir = rootvnode;
478	VREF(fdp->fd_fd.fd_cdir);
479	VOP_UNLOCK(rootvnode, 0, &proc0);
480	fdp->fd_fd.fd_rdir = NULL;
481}
482SYSINIT(retrofit, SI_SUB_ROOT_FDTAB, SI_ORDER_FIRST, xxx_vfs_root_fdtab, NULL)
483
484
485/*
486 ***************************************************************************
487 ****
488 **** The following code probably belongs in another file, like
489 **** kern/init_init.c.  It is here for two reasons only:
490 ****
491 ****	1)	This code returns to startup the system; this is
492 ****		abnormal for a kernel thread.
493 ****	2)	This code promiscuously uses init_frame
494 ****
495 ***************************************************************************
496 */
497
498static void kthread_init __P((void *dummy));
499SYSINIT_KT(init,SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kthread_init, NULL)
500
501
502static void start_init __P((struct proc *p, void *framep));
503
504/* ARGSUSED*/
505static void
506kthread_init(dummy)
507	void *dummy;
508{
509
510	/* Create process 1 (init(8)). */
511	start_init(curproc, init_framep);
512
513	/*
514	 * This is the only kernel thread allowed to return yo the
515	 * caller!!!
516	 */
517	return;
518}
519
520
521/*
522 * List of paths to try when searching for "init".
523 */
524static char *initpaths[] = {
525	"/sbin/init",
526	"/sbin/oinit",
527	"/sbin/init.bak",
528	"/stand/sysinstall",
529	NULL,
530};
531
532/*
533 * Start the initial user process; try exec'ing each pathname in "initpaths".
534 * The program is invoked with one argument containing the boot flags.
535 */
536static void
537start_init(p, framep)
538	struct proc *p;
539	void *framep;
540{
541	vm_offset_t addr;
542	struct execve_args args;
543	int options, i, retval[2], error;
544	char **pathp, *path, *ucp, **uap, *arg0, *arg1;
545
546	initproc = p;
547
548	/*
549	 * We need to set the system call frame as if we were entered through
550	 * a syscall() so that when we call execve() below, it will be able
551	 * to set the entry point (see setregs) when it tries to exec.  The
552	 * startup code in "locore.s" has allocated space for the frame and
553	 * passed a pointer to that space as main's argument.
554	 */
555	cpu_set_init_frame(p, framep);
556
557	/*
558	 * Need just enough stack to hold the faked-up "execve()" arguments.
559	 */
560	addr = trunc_page(VM_MAXUSER_ADDRESS - PAGE_SIZE);
561	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
562		panic("init: couldn't allocate argument space");
563	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
564	p->p_vmspace->vm_ssize = 1;
565
566	for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
567		/*
568		 * Move out the boot flag argument.
569		 */
570		options = 0;
571		ucp = (char *)USRSTACK;
572		(void)subyte(--ucp, 0);		/* trailing zero */
573		if (boothowto & RB_SINGLE) {
574			(void)subyte(--ucp, 's');
575			options = 1;
576		}
577#ifdef notyet
578                if (boothowto & RB_FASTBOOT) {
579			(void)subyte(--ucp, 'f');
580			options = 1;
581		}
582#endif
583
584#ifdef BOOTCDROM
585		(void)subyte(--ucp, 'C');
586		options = 1;
587#endif
588
589#if defined(DEVFS) && defined(DEVFS_ROOT)
590		(void)subyte(--ucp, 'd');
591		options = 1;
592#endif
593		if (options == 0)
594			(void)subyte(--ucp, '-');
595		(void)subyte(--ucp, '-');		/* leading hyphen */
596		arg1 = ucp;
597
598		/*
599		 * Move out the file name (also arg 0).
600		 */
601		for (i = strlen(path) + 1; i >= 0; i--)
602			(void)subyte(--ucp, path[i]);
603		arg0 = ucp;
604
605		/*
606		 * Move out the arg pointers.
607		 */
608		uap = (char **)((int)ucp & ~(NBPW-1));
609		(void)suword((caddr_t)--uap, 0);	/* terminator */
610		(void)suword((caddr_t)--uap, (int)arg1);
611		(void)suword((caddr_t)--uap, (int)arg0);
612
613		/*
614		 * Point at the arguments.
615		 */
616		args.fname = arg0;
617		args.argv = uap;
618		args.envv = NULL;
619
620		/*
621		 * Now try to exec the program.  If can't for any reason
622		 * other than it doesn't exist, complain.
623		 *
624		 * Otherwise return to main() which returns to btext
625		 * which completes the system startup.
626		 */
627		if ((error = execve(p, &args, &retval[0])) == 0)
628			return;
629		if (error != ENOENT)
630			printf("exec %s: error %d\n", path, error);
631	}
632	printf("init: not found\n");
633	panic("no init");
634}
635