init_main.c revision 210226
1275970Scy/*- 2275970Scy * Copyright (c) 1995 Terrence R. Lambert 3275970Scy * All rights reserved. 4275970Scy * 5275970Scy * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 6275970Scy * The Regents of the University of California. All rights reserved. 7275970Scy * (c) UNIX System Laboratories, Inc. 8275970Scy * All or some portions of this file are derived from material licensed 9275970Scy * to the University of California by American Telephone and Telegraph 10275970Scy * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11275970Scy * the permission of UNIX System Laboratories, Inc. 12275970Scy * 13275970Scy * Redistribution and use in source and binary forms, with or without 14275970Scy * modification, are permitted provided that the following conditions 15275970Scy * are met: 16275970Scy * 1. Redistributions of source code must retain the above copyright 17275970Scy * notice, this list of conditions and the following disclaimer. 18275970Scy * 2. Redistributions in binary form must reproduce the above copyright 19275970Scy * notice, this list of conditions and the following disclaimer in the 20275970Scy * documentation and/or other materials provided with the distribution. 21275970Scy * 3. All advertising materials mentioning features or use of this software 22275970Scy * must display the following acknowledgement: 23275970Scy * This product includes software developed by the University of 24275970Scy * California, Berkeley and its contributors. 25275970Scy * 4. Neither the name of the University nor the names of its contributors 26275970Scy * may be used to endorse or promote products derived from this software 27275970Scy * without specific prior written permission. 28275970Scy * 29275970Scy * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30275970Scy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31275970Scy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32275970Scy * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33275970Scy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34275970Scy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35275970Scy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36275970Scy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37275970Scy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38275970Scy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39275970Scy * SUCH DAMAGE. 40275970Scy * 41275970Scy * @(#)init_main.c 8.9 (Berkeley) 1/21/94 42275970Scy */ 43275970Scy 44275970Scy#include <sys/cdefs.h> 45275970Scy__FBSDID("$FreeBSD: head/sys/kern/init_main.c 210226 2010-07-18 20:57:53Z trasz $"); 46275970Scy 47275970Scy#include "opt_ddb.h" 48275970Scy#include "opt_init_path.h" 49275970Scy 50275970Scy#include <sys/param.h> 51275970Scy#include <sys/kernel.h> 52275970Scy#include <sys/exec.h> 53275970Scy#include <sys/file.h> 54275970Scy#include <sys/filedesc.h> 55275970Scy#include <sys/jail.h> 56275970Scy#include <sys/ktr.h> 57275970Scy#include <sys/lock.h> 58275970Scy#include <sys/mount.h> 59275970Scy#include <sys/mutex.h> 60275970Scy#include <sys/syscallsubr.h> 61275970Scy#include <sys/sysctl.h> 62275970Scy#include <sys/proc.h> 63275970Scy#include <sys/resourcevar.h> 64275970Scy#include <sys/systm.h> 65275970Scy#include <sys/signalvar.h> 66275970Scy#include <sys/vnode.h> 67275970Scy#include <sys/sysent.h> 68275970Scy#include <sys/reboot.h> 69275970Scy#include <sys/sched.h> 70275970Scy#include <sys/sx.h> 71275970Scy#include <sys/sysproto.h> 72275970Scy#include <sys/vmmeter.h> 73275970Scy#include <sys/unistd.h> 74275970Scy#include <sys/malloc.h> 75275970Scy#include <sys/conf.h> 76275970Scy#include <sys/cpuset.h> 77275970Scy 78275970Scy#include <machine/cpu.h> 79275970Scy 80275970Scy#include <security/audit/audit.h> 81275970Scy#include <security/mac/mac_framework.h> 82275970Scy 83275970Scy#include <vm/vm.h> 84275970Scy#include <vm/vm_param.h> 85275970Scy#include <vm/pmap.h> 86275970Scy#include <vm/vm_map.h> 87275970Scy#include <sys/copyright.h> 88275970Scy 89275970Scy#include <ddb/ddb.h> 90275970Scy#include <ddb/db_sym.h> 91275970Scy 92275970Scyvoid mi_startup(void); /* Should be elsewhere */ 93275970Scy 94275970Scy/* Components of the first process -- never freed. */ 95275970Scystatic struct session session0; 96275970Scystatic struct pgrp pgrp0; 97275970Scystruct proc proc0; 98275970Scystruct thread thread0 __aligned(16); 99275970Scystruct vmspace vmspace0; 100275970Scystruct proc *initproc; 101275970Scy 102275970Scyint boothowto = 0; /* initialized so that it can be patched */ 103275970ScySYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, ""); 104275970Scyint bootverbose; 105275970ScySYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, ""); 106275970Scy 107275970Scy/* 108275970Scy * This ensures that there is at least one entry so that the sysinit_set 109275970Scy * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never 110275970Scy * executed. 111275970Scy */ 112275970ScySYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL); 113275970Scy 114275970Scy/* 115275970Scy * The sysinit table itself. Items are checked off as the are run. 116275970Scy * If we want to register new sysinit types, add them to newsysinit. 117275970Scy */ 118275970ScySET_DECLARE(sysinit_set, struct sysinit); 119275970Scystruct sysinit **sysinit, **sysinit_end; 120275970Scystruct sysinit **newsysinit, **newsysinit_end; 121275970Scy 122275970Scy/* 123275970Scy * Merge a new sysinit set into the current set, reallocating it if 124275970Scy * necessary. This can only be called after malloc is running. 125275970Scy */ 126275970Scyvoid 127275970Scysysinit_add(struct sysinit **set, struct sysinit **set_end) 128275970Scy{ 129275970Scy struct sysinit **newset; 130275970Scy struct sysinit **sipp; 131275970Scy struct sysinit **xipp; 132275970Scy int count; 133275970Scy 134275970Scy count = set_end - set; 135275970Scy if (newsysinit) 136275970Scy count += newsysinit_end - newsysinit; 137275970Scy else 138275970Scy count += sysinit_end - sysinit; 139275970Scy newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT); 140275970Scy if (newset == NULL) 141275970Scy panic("cannot malloc for sysinit"); 142275970Scy xipp = newset; 143275970Scy if (newsysinit) 144275970Scy for (sipp = newsysinit; sipp < newsysinit_end; sipp++) 145275970Scy *xipp++ = *sipp; 146275970Scy else 147275970Scy for (sipp = sysinit; sipp < sysinit_end; sipp++) 148275970Scy *xipp++ = *sipp; 149275970Scy for (sipp = set; sipp < set_end; sipp++) 150275970Scy *xipp++ = *sipp; 151275970Scy if (newsysinit) 152275970Scy free(newsysinit, M_TEMP); 153275970Scy newsysinit = newset; 154275970Scy newsysinit_end = newset + count; 155275970Scy} 156275970Scy 157275970Scy/* 158275970Scy * System startup; initialize the world, create process 0, mount root 159275970Scy * filesystem, and fork to create init and pagedaemon. Most of the 160275970Scy * hard work is done in the lower-level initialization routines including 161275970Scy * startup(), which does memory initialization and autoconfiguration. 162275970Scy * 163275970Scy * This allows simple addition of new kernel subsystems that require 164275970Scy * boot time initialization. It also allows substitution of subsystem 165275970Scy * (for instance, a scheduler, kernel profiler, or VM system) by object 166275970Scy * module. Finally, it allows for optional "kernel threads". 167275970Scy */ 168275970Scyvoid 169275970Scymi_startup(void) 170275970Scy{ 171275970Scy 172275970Scy register struct sysinit **sipp; /* system initialization*/ 173275970Scy register struct sysinit **xipp; /* interior loop of sort*/ 174275970Scy register struct sysinit *save; /* bubble*/ 175275970Scy 176275970Scy#if defined(VERBOSE_SYSINIT) 177275970Scy int last; 178275970Scy int verbose; 179275970Scy#endif 180275970Scy 181275970Scy if (sysinit == NULL) { 182275970Scy sysinit = SET_BEGIN(sysinit_set); 183275970Scy sysinit_end = SET_LIMIT(sysinit_set); 184275970Scy } 185275970Scy 186275970Scyrestart: 187275970Scy /* 188275970Scy * Perform a bubble sort of the system initialization objects by 189275970Scy * their subsystem (primary key) and order (secondary key). 190275970Scy */ 191275970Scy for (sipp = sysinit; sipp < sysinit_end; sipp++) { 192275970Scy for (xipp = sipp + 1; xipp < sysinit_end; xipp++) { 193275970Scy if ((*sipp)->subsystem < (*xipp)->subsystem || 194275970Scy ((*sipp)->subsystem == (*xipp)->subsystem && 195275970Scy (*sipp)->order <= (*xipp)->order)) 196275970Scy continue; /* skip*/ 197275970Scy save = *sipp; 198275970Scy *sipp = *xipp; 199275970Scy *xipp = save; 200275970Scy } 201275970Scy } 202275970Scy 203275970Scy#if defined(VERBOSE_SYSINIT) 204275970Scy last = SI_SUB_COPYRIGHT; 205275970Scy verbose = 0; 206275970Scy#if !defined(DDB) 207275970Scy printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n"); 208275970Scy#endif 209275970Scy#endif 210275970Scy 211275970Scy /* 212275970Scy * Traverse the (now) ordered list of system initialization tasks. 213275970Scy * Perform each task, and continue on to the next task. 214275970Scy * 215275970Scy * The last item on the list is expected to be the scheduler, 216275970Scy * which will not return. 217275970Scy */ 218275970Scy for (sipp = sysinit; sipp < sysinit_end; sipp++) { 219275970Scy 220275970Scy if ((*sipp)->subsystem == SI_SUB_DUMMY) 221275970Scy continue; /* skip dummy task(s)*/ 222275970Scy 223275970Scy if ((*sipp)->subsystem == SI_SUB_DONE) 224275970Scy continue; 225275970Scy 226275970Scy#if defined(VERBOSE_SYSINIT) 227275970Scy if ((*sipp)->subsystem > last) { 228275970Scy verbose = 1; 229275970Scy last = (*sipp)->subsystem; 230275970Scy printf("subsystem %x\n", last); 231275970Scy } 232275970Scy if (verbose) { 233275970Scy#if defined(DDB) 234275970Scy const char *name; 235275970Scy c_db_sym_t sym; 236275970Scy db_expr_t offset; 237275970Scy 238275970Scy sym = db_search_symbol((vm_offset_t)(*sipp)->func, 239275970Scy DB_STGY_PROC, &offset); 240275970Scy db_symbol_values(sym, &name, NULL); 241275970Scy if (name != NULL) 242275970Scy printf(" %s(%p)... ", name, (*sipp)->udata); 243275970Scy else 244275970Scy#endif 245275970Scy printf(" %p(%p)... ", (*sipp)->func, 246275970Scy (*sipp)->udata); 247275970Scy } 248275970Scy#endif 249275970Scy 250275970Scy /* Call function */ 251275970Scy (*((*sipp)->func))((*sipp)->udata); 252275970Scy 253275970Scy#if defined(VERBOSE_SYSINIT) 254275970Scy if (verbose) 255275970Scy printf("done.\n"); 256275970Scy#endif 257275970Scy 258275970Scy /* Check off the one we're just done */ 259275970Scy (*sipp)->subsystem = SI_SUB_DONE; 260275970Scy 261275970Scy /* Check if we've installed more sysinit items via KLD */ 262275970Scy if (newsysinit != NULL) { 263275970Scy if (sysinit != SET_BEGIN(sysinit_set)) 264275970Scy free(sysinit, M_TEMP); 265275970Scy sysinit = newsysinit; 266275970Scy sysinit_end = newsysinit_end; 267275970Scy newsysinit = NULL; 268275970Scy newsysinit_end = NULL; 269275970Scy goto restart; 270275970Scy } 271275970Scy } 272275970Scy 273275970Scy panic("Shouldn't get here!"); 274275970Scy /* NOTREACHED*/ 275275970Scy} 276275970Scy 277275970Scy 278275970Scy/* 279275970Scy *************************************************************************** 280275970Scy **** 281275970Scy **** The following SYSINIT's belong elsewhere, but have not yet 282275970Scy **** been moved. 283275970Scy **** 284275970Scy *************************************************************************** 285275970Scy */ 286275970Scystatic void 287275970Scyprint_caddr_t(void *data) 288275970Scy{ 289275970Scy printf("%s", (char *)data); 290275970Scy} 291275970Scy 292275970Scystatic void 293275970Scyprint_version(void *data __unused) 294275970Scy{ 295275970Scy int len; 296275970Scy 297275970Scy /* Strip a trailing newline from version. */ 298275970Scy len = strlen(version); 299275970Scy while (len > 0 && version[len - 1] == '\n') 300275970Scy len--; 301275970Scy printf("%.*s %s\n", len, version, machine); 302275970Scy} 303275970Scy 304275970ScySYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, 305275970Scy copyright); 306275970ScySYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, 307275970Scy trademark); 308275970ScySYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL); 309275970Scy 310275970Scy#ifdef WITNESS 311275970Scystatic char wit_warn[] = 312275970Scy "WARNING: WITNESS option enabled, expect reduced performance.\n"; 313275970ScySYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1, 314275970Scy print_caddr_t, wit_warn); 315275970ScySYSINIT(witwarn2, SI_SUB_RUN_SCHEDULER, SI_ORDER_THIRD + 1, 316275970Scy print_caddr_t, wit_warn); 317275970Scy#endif 318275970Scy 319275970Scy#ifdef DIAGNOSTIC 320275970Scystatic char diag_warn[] = 321275970Scy "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n"; 322275970ScySYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 2, 323275970Scy print_caddr_t, diag_warn); 324275970ScySYSINIT(diagwarn2, SI_SUB_RUN_SCHEDULER, SI_ORDER_THIRD + 2, 325275970Scy print_caddr_t, diag_warn); 326275970Scy#endif 327275970Scy 328275970Scystatic void 329275970Scyset_boot_verbose(void *data __unused) 330275970Scy{ 331275970Scy 332275970Scy if (boothowto & RB_VERBOSE) 333275970Scy bootverbose++; 334275970Scy} 335275970ScySYSINIT(boot_verbose, SI_SUB_TUNABLES, SI_ORDER_ANY, set_boot_verbose, NULL); 336275970Scy 337275970Scystatic int 338275970Scynull_fetch_syscall_args(struct thread *td __unused, 339275970Scy struct syscall_args *sa __unused) 340275970Scy{ 341275970Scy 342275970Scy panic("null_fetch_syscall_args"); 343275970Scy} 344275970Scy 345275970Scystatic void 346275970Scynull_set_syscall_retval(struct thread *td __unused, int error __unused) 347275970Scy{ 348275970Scy 349275970Scy panic("null_set_syscall_retval"); 350275970Scy} 351275970Scy 352275970Scystruct sysentvec null_sysvec = { 353275970Scy .sv_size = 0, 354275970Scy .sv_table = NULL, 355275970Scy .sv_mask = 0, 356275970Scy .sv_sigsize = 0, 357275970Scy .sv_sigtbl = NULL, 358275970Scy .sv_errsize = 0, 359275970Scy .sv_errtbl = NULL, 360275970Scy .sv_transtrap = NULL, 361275970Scy .sv_fixup = NULL, 362275970Scy .sv_sendsig = NULL, 363275970Scy .sv_sigcode = NULL, 364275970Scy .sv_szsigcode = NULL, 365275970Scy .sv_prepsyscall = NULL, 366275970Scy .sv_name = "null", 367275970Scy .sv_coredump = NULL, 368275970Scy .sv_imgact_try = NULL, 369275970Scy .sv_minsigstksz = 0, 370275970Scy .sv_pagesize = PAGE_SIZE, 371275970Scy .sv_minuser = VM_MIN_ADDRESS, 372275970Scy .sv_maxuser = VM_MAXUSER_ADDRESS, 373275970Scy .sv_usrstack = USRSTACK, 374275970Scy .sv_psstrings = PS_STRINGS, 375275970Scy .sv_stackprot = VM_PROT_ALL, 376275970Scy .sv_copyout_strings = NULL, 377275970Scy .sv_setregs = NULL, 378275970Scy .sv_fixlimit = NULL, 379275970Scy .sv_maxssiz = NULL, 380275970Scy .sv_flags = 0, 381275970Scy .sv_set_syscall_retval = null_set_syscall_retval, 382275970Scy .sv_fetch_syscall_args = null_fetch_syscall_args, 383275970Scy .sv_syscallnames = NULL, 384275970Scy}; 385275970Scy 386275970Scy/* 387275970Scy *************************************************************************** 388275970Scy **** 389275970Scy **** The two following SYSINIT's are proc0 specific glue code. I am not 390275970Scy **** convinced that they can not be safely combined, but their order of 391275970Scy **** operation has been maintained as the same as the original init_main.c 392275970Scy **** for right now. 393275970Scy **** 394275970Scy **** These probably belong in init_proc.c or kern_proc.c, since they 395275970Scy **** deal with proc0 (the fork template process). 396275970Scy **** 397275970Scy *************************************************************************** 398275970Scy */ 399275970Scy/* ARGSUSED*/ 400275970Scystatic void 401275970Scyproc0_init(void *dummy __unused) 402275970Scy{ 403275970Scy struct proc *p; 404275970Scy struct thread *td; 405275970Scy vm_paddr_t pageablemem; 406275970Scy int i; 407275970Scy 408275970Scy GIANT_REQUIRED; 409275970Scy p = &proc0; 410275970Scy td = &thread0; 411275970Scy 412275970Scy /* 413275970Scy * Initialize magic number and osrel. 414275970Scy */ 415275970Scy p->p_magic = P_MAGIC; 416275970Scy p->p_osrel = osreldate; 417275970Scy 418275970Scy /* 419275970Scy * Initialize thread and process structures. 420275970Scy */ 421275970Scy procinit(); /* set up proc zone */ 422275970Scy threadinit(); /* set up UMA zones */ 423275970Scy 424275970Scy /* 425275970Scy * Initialise scheduler resources. 426275970Scy * Add scheduler specific parts to proc, thread as needed. 427275970Scy */ 428282408Scy schedinit(); /* scheduler gets its house in order */ 429275970Scy /* 430275970Scy * Initialize sleep queue hash table 431282408Scy */ 432275970Scy sleepinit(); 433282408Scy 434282408Scy /* 435275970Scy * additional VM structures 436282408Scy */ 437282408Scy vm_init2(); 438275970Scy 439282408Scy /* 440282408Scy * Create process 0 (the swapper). 441282408Scy */ 442282408Scy LIST_INSERT_HEAD(&allproc, p, p_list); 443275970Scy LIST_INSERT_HEAD(PIDHASH(0), p, p_hash); 444282408Scy mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); 445282408Scy p->p_pgrp = &pgrp0; 446282408Scy LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); 447282408Scy LIST_INIT(&pgrp0.pg_members); 448282408Scy LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); 449282408Scy 450282408Scy pgrp0.pg_session = &session0; 451282408Scy mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF); 452282408Scy refcount_init(&session0.s_count, 1); 453282408Scy session0.s_leader = p; 454282408Scy 455282408Scy p->p_sysent = &null_sysvec; 456275970Scy p->p_flag = P_SYSTEM | P_INMEM; 457275970Scy p->p_state = PRS_NORMAL; 458275970Scy knlist_init_mtx(&p->p_klist, &p->p_mtx); 459275970Scy STAILQ_INIT(&p->p_ktr); 460275970Scy p->p_nice = NZERO; 461275970Scy td->td_tid = PID_MAX + 1; 462275970Scy td->td_state = TDS_RUNNING; 463275970Scy td->td_pri_class = PRI_TIMESHARE; 464275970Scy td->td_user_pri = PUSER; 465282408Scy td->td_base_user_pri = PUSER; 466275970Scy td->td_priority = PVM; 467275970Scy td->td_base_pri = PUSER; 468282408Scy td->td_oncpu = 0; 469282408Scy td->td_flags = TDF_INMEM|TDP_KTHREAD; 470282408Scy td->td_cpuset = cpuset_thread0(); 471282408Scy prison0.pr_cpuset = cpuset_ref(td->td_cpuset); 472282408Scy p->p_peers = 0; 473282408Scy p->p_leader = p; 474282408Scy 475282408Scy 476282408Scy strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); 477282408Scy strncpy(td->td_name, "swapper", sizeof (td->td_name)); 478282408Scy 479275970Scy callout_init(&p->p_itcallout, CALLOUT_MPSAFE); 480275970Scy callout_init_mtx(&p->p_limco, &p->p_mtx, 0); 481275970Scy callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); 482275970Scy 483275970Scy /* Create credentials. */ 484275970Scy p->p_ucred = crget(); 485275970Scy p->p_ucred->cr_ngroups = 1; /* group 0 */ 486275970Scy p->p_ucred->cr_uidinfo = uifind(0); 487282408Scy p->p_ucred->cr_ruidinfo = uifind(0); 488275970Scy p->p_ucred->cr_prison = &prison0; 489282408Scy#ifdef AUDIT 490282408Scy audit_cred_kproc0(p->p_ucred); 491282408Scy#endif 492282408Scy#ifdef MAC 493282408Scy mac_cred_create_swapper(p->p_ucred); 494282408Scy#endif 495282408Scy td->td_ucred = crhold(p->p_ucred); 496282408Scy 497282408Scy /* Create sigacts. */ 498282408Scy p->p_sigacts = sigacts_alloc(); 499282408Scy 500282408Scy /* Initialize signal state for process 0. */ 501282408Scy siginit(&proc0); 502275970Scy 503275970Scy /* Create the file descriptor table. */ 504275970Scy p->p_fd = fdinit(NULL); 505275970Scy p->p_fdtol = NULL; 506275970Scy 507275970Scy /* Create the limits structures. */ 508275970Scy p->p_limit = lim_alloc(); 509275970Scy for (i = 0; i < RLIM_NLIMITS; i++) 510275970Scy p->p_limit->pl_rlimit[i].rlim_cur = 511275970Scy p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; 512275970Scy p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = 513275970Scy p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; 514275970Scy p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = 515275970Scy p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; 516275970Scy p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; 517275970Scy p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; 518275970Scy p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; 519275970Scy p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; 520275970Scy /* Cast to avoid overflow on i386/PAE. */ 521275970Scy pageablemem = ptoa((vm_paddr_t)cnt.v_free_count); 522275970Scy p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = 523275970Scy p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; 524275970Scy p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; 525275970Scy p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; 526275970Scy p->p_cpulimit = RLIM_INFINITY; 527275970Scy 528275970Scy p->p_stats = pstats_alloc(); 529275970Scy 530275970Scy /* Allocate a prototype map so we have something to fork. */ 531275970Scy pmap_pinit0(vmspace_pmap(&vmspace0)); 532275970Scy p->p_vmspace = &vmspace0; 533275970Scy vmspace0.vm_refcnt = 1; 534275970Scy 535275970Scy /* 536275970Scy * proc0 is not expected to enter usermode, so there is no special 537275970Scy * handling for sv_minuser here, like is done for exec_new_vmspace(). 538275970Scy */ 539275970Scy vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0), 540275970Scy p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser); 541275970Scy 542275970Scy /*- 543275970Scy * call the init and ctor for the new thread and proc 544275970Scy * we wait to do this until all other structures 545275970Scy * are fairly sane. 546 */ 547 EVENTHANDLER_INVOKE(process_init, p); 548 EVENTHANDLER_INVOKE(thread_init, td); 549 EVENTHANDLER_INVOKE(process_ctor, p); 550 EVENTHANDLER_INVOKE(thread_ctor, td); 551 552 /* 553 * Charge root for one process. 554 */ 555 (void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0); 556} 557SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL); 558 559/* ARGSUSED*/ 560static void 561proc0_post(void *dummy __unused) 562{ 563 struct timespec ts; 564 struct proc *p; 565 struct rusage ru; 566 struct thread *td; 567 568 /* 569 * Now we can look at the time, having had a chance to verify the 570 * time from the filesystem. Pretend that proc0 started now. 571 */ 572 sx_slock(&allproc_lock); 573 FOREACH_PROC_IN_SYSTEM(p) { 574 microuptime(&p->p_stats->p_start); 575 PROC_SLOCK(p); 576 rufetch(p, &ru); /* Clears thread stats */ 577 PROC_SUNLOCK(p); 578 p->p_rux.rux_runtime = 0; 579 p->p_rux.rux_uticks = 0; 580 p->p_rux.rux_sticks = 0; 581 p->p_rux.rux_iticks = 0; 582 FOREACH_THREAD_IN_PROC(p, td) { 583 td->td_runtime = 0; 584 } 585 } 586 sx_sunlock(&allproc_lock); 587 PCPU_SET(switchtime, cpu_ticks()); 588 PCPU_SET(switchticks, ticks); 589 590 /* 591 * Give the ``random'' number generator a thump. 592 */ 593 nanotime(&ts); 594 srandom(ts.tv_sec ^ ts.tv_nsec); 595} 596SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL); 597 598static void 599random_init(void *dummy __unused) 600{ 601 602 /* 603 * After CPU has been started we have some randomness on most 604 * platforms via get_cyclecount(). For platforms that don't 605 * we will reseed random(9) in proc0_post() as well. 606 */ 607 srandom(get_cyclecount()); 608} 609SYSINIT(random, SI_SUB_RANDOM, SI_ORDER_FIRST, random_init, NULL); 610 611/* 612 *************************************************************************** 613 **** 614 **** The following SYSINIT's and glue code should be moved to the 615 **** respective files on a per subsystem basis. 616 **** 617 *************************************************************************** 618 */ 619 620 621/* 622 *************************************************************************** 623 **** 624 **** The following code probably belongs in another file, like 625 **** kern/init_init.c. 626 **** 627 *************************************************************************** 628 */ 629 630/* 631 * List of paths to try when searching for "init". 632 */ 633static char init_path[MAXPATHLEN] = 634#ifdef INIT_PATH 635 __XSTRING(INIT_PATH); 636#else 637 "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init:/stand/sysinstall"; 638#endif 639SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, 640 "Path used to search the init process"); 641 642/* 643 * Shutdown timeout of init(8). 644 * Unused within kernel, but used to control init(8), hence do not remove. 645 */ 646#ifndef INIT_SHUTDOWN_TIMEOUT 647#define INIT_SHUTDOWN_TIMEOUT 120 648#endif 649static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT; 650SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout, 651 CTLFLAG_RW, &init_shutdown_timeout, 0, ""); 652 653/* 654 * Start the initial user process; try exec'ing each pathname in init_path. 655 * The program is invoked with one argument containing the boot flags. 656 */ 657static void 658start_init(void *dummy) 659{ 660 vm_offset_t addr; 661 struct execve_args args; 662 int options, error; 663 char *var, *path, *next, *s; 664 char *ucp, **uap, *arg0, *arg1; 665 struct thread *td; 666 struct proc *p; 667 668 mtx_lock(&Giant); 669 670 GIANT_REQUIRED; 671 672 td = curthread; 673 p = td->td_proc; 674 675 vfs_mountroot(); 676 677 /* 678 * Need just enough stack to hold the faked-up "execve()" arguments. 679 */ 680 addr = p->p_sysent->sv_usrstack - PAGE_SIZE; 681 if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, 682 FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0) 683 panic("init: couldn't allocate argument space"); 684 p->p_vmspace->vm_maxsaddr = (caddr_t)addr; 685 p->p_vmspace->vm_ssize = 1; 686 687 if ((var = getenv("init_path")) != NULL) { 688 strlcpy(init_path, var, sizeof(init_path)); 689 freeenv(var); 690 } 691 692 for (path = init_path; *path != '\0'; path = next) { 693 while (*path == ':') 694 path++; 695 if (*path == '\0') 696 break; 697 for (next = path; *next != '\0' && *next != ':'; next++) 698 /* nothing */ ; 699 if (bootverbose) 700 printf("start_init: trying %.*s\n", (int)(next - path), 701 path); 702 703 /* 704 * Move out the boot flag argument. 705 */ 706 options = 0; 707 ucp = (char *)p->p_sysent->sv_usrstack; 708 (void)subyte(--ucp, 0); /* trailing zero */ 709 if (boothowto & RB_SINGLE) { 710 (void)subyte(--ucp, 's'); 711 options = 1; 712 } 713#ifdef notyet 714 if (boothowto & RB_FASTBOOT) { 715 (void)subyte(--ucp, 'f'); 716 options = 1; 717 } 718#endif 719 720#ifdef BOOTCDROM 721 (void)subyte(--ucp, 'C'); 722 options = 1; 723#endif 724 725 if (options == 0) 726 (void)subyte(--ucp, '-'); 727 (void)subyte(--ucp, '-'); /* leading hyphen */ 728 arg1 = ucp; 729 730 /* 731 * Move out the file name (also arg 0). 732 */ 733 (void)subyte(--ucp, 0); 734 for (s = next - 1; s >= path; s--) 735 (void)subyte(--ucp, *s); 736 arg0 = ucp; 737 738 /* 739 * Move out the arg pointers. 740 */ 741 uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1)); 742 (void)suword((caddr_t)--uap, (long)0); /* terminator */ 743 (void)suword((caddr_t)--uap, (long)(intptr_t)arg1); 744 (void)suword((caddr_t)--uap, (long)(intptr_t)arg0); 745 746 /* 747 * Point at the arguments. 748 */ 749 args.fname = arg0; 750 args.argv = uap; 751 args.envv = NULL; 752 753 /* 754 * Now try to exec the program. If can't for any reason 755 * other than it doesn't exist, complain. 756 * 757 * Otherwise, return via fork_trampoline() all the way 758 * to user mode as init! 759 */ 760 if ((error = execve(td, &args)) == 0) { 761 mtx_unlock(&Giant); 762 return; 763 } 764 if (error != ENOENT) 765 printf("exec %.*s: error %d\n", (int)(next - path), 766 path, error); 767 } 768 printf("init: not found in path %s\n", init_path); 769 panic("no init"); 770} 771 772/* 773 * Like kproc_create(), but runs in it's own address space. 774 * We do this early to reserve pid 1. 775 * 776 * Note special case - do not make it runnable yet. Other work 777 * in progress will change this more. 778 */ 779static void 780create_init(const void *udata __unused) 781{ 782 struct ucred *newcred, *oldcred; 783 int error; 784 785 error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc); 786 if (error) 787 panic("cannot fork init: %d\n", error); 788 KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); 789 /* divorce init's credentials from the kernel's */ 790 newcred = crget(); 791 PROC_LOCK(initproc); 792 initproc->p_flag |= P_SYSTEM | P_INMEM; 793 oldcred = initproc->p_ucred; 794 crcopy(newcred, oldcred); 795#ifdef MAC 796 mac_cred_create_init(newcred); 797#endif 798#ifdef AUDIT 799 audit_cred_proc1(newcred); 800#endif 801 initproc->p_ucred = newcred; 802 PROC_UNLOCK(initproc); 803 crfree(oldcred); 804 cred_update_thread(FIRST_THREAD_IN_PROC(initproc)); 805 cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL); 806} 807SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL); 808 809/* 810 * Make it runnable now. 811 */ 812static void 813kick_init(const void *udata __unused) 814{ 815 struct thread *td; 816 817 td = FIRST_THREAD_IN_PROC(initproc); 818 thread_lock(td); 819 TD_SET_CAN_RUN(td); 820 sched_add(td, SRQ_BORING); 821 thread_unlock(td); 822} 823SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL); 824