init_main.c revision 206142
1109864Sjeff/*- 2113357Sjeff * Copyright (c) 1995 Terrence R. Lambert 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 6109864Sjeff * The Regents of the University of California. All rights reserved. 7109864Sjeff * (c) UNIX System Laboratories, Inc. 8109864Sjeff * All or some portions of this file are derived from material licensed 9109864Sjeff * to the University of California by American Telephone and Telegraph 10109864Sjeff * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11109864Sjeff * the permission of UNIX System Laboratories, Inc. 12109864Sjeff * 13109864Sjeff * Redistribution and use in source and binary forms, with or without 14109864Sjeff * modification, are permitted provided that the following conditions 15109864Sjeff * are met: 16109864Sjeff * 1. Redistributions of source code must retain the above copyright 17109864Sjeff * notice, this list of conditions and the following disclaimer. 18109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 19109864Sjeff * notice, this list of conditions and the following disclaimer in the 20109864Sjeff * documentation and/or other materials provided with the distribution. 21109864Sjeff * 3. All advertising materials mentioning features or use of this software 22109864Sjeff * must display the following acknowledgement: 23109864Sjeff * This product includes software developed by the University of 24109864Sjeff * California, Berkeley and its contributors. 25109864Sjeff * 4. Neither the name of the University nor the names of its contributors 26109864Sjeff * may be used to endorse or promote products derived from this software 27116182Sobrien * without specific prior written permission. 28116182Sobrien * 29116182Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30109864Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31109864Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32109864Sjeff * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33109864Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34109864Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35109864Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36109864Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37112966Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38122038Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39109864Sjeff * SUCH DAMAGE. 40109864Sjeff * 41109864Sjeff * @(#)init_main.c 8.9 (Berkeley) 1/21/94 42109864Sjeff */ 43109864Sjeff 44109864Sjeff#include <sys/cdefs.h> 45109864Sjeff__FBSDID("$FreeBSD: head/sys/kern/init_main.c 206142 2010-04-03 19:07:05Z alc $"); 46109864Sjeff 47109864Sjeff#include "opt_ddb.h" 48109864Sjeff#include "opt_init_path.h" 49109864Sjeff 50109864Sjeff#include <sys/param.h> 51109864Sjeff#include <sys/kernel.h> 52109864Sjeff#include <sys/exec.h> 53109864Sjeff#include <sys/file.h> 54121790Sjeff#include <sys/filedesc.h> 55109864Sjeff#include <sys/jail.h> 56113357Sjeff#include <sys/ktr.h> 57113357Sjeff#include <sys/lock.h> 58109864Sjeff#include <sys/mount.h> 59109864Sjeff#include <sys/mutex.h> 60109864Sjeff#include <sys/syscallsubr.h> 61109864Sjeff#include <sys/sysctl.h> 62109864Sjeff#include <sys/proc.h> 63109864Sjeff#include <sys/resourcevar.h> 64109864Sjeff#include <sys/systm.h> 65109864Sjeff#include <sys/signalvar.h> 66113357Sjeff#include <sys/vnode.h> 67113357Sjeff#include <sys/sysent.h> 68113357Sjeff#include <sys/reboot.h> 69113357Sjeff#include <sys/sched.h> 70113357Sjeff#include <sys/sx.h> 71116365Sjeff#include <sys/sysproto.h> 72113357Sjeff#include <sys/vmmeter.h> 73113357Sjeff#include <sys/unistd.h> 74111857Sjeff#include <sys/malloc.h> 75113357Sjeff#include <sys/conf.h> 76111857Sjeff#include <sys/cpuset.h> 77116069Sjeff 78123487Sjeff#include <machine/cpu.h> 79116069Sjeff 80123487Sjeff#include <security/audit/audit.h> 81116069Sjeff#include <security/mac/mac_framework.h> 82116069Sjeff 83109864Sjeff#include <vm/vm.h> 84109864Sjeff#include <vm/vm_param.h> 85109864Sjeff#include <vm/pmap.h> 86109864Sjeff#include <vm/vm_map.h> 87109864Sjeff#include <sys/copyright.h> 88109864Sjeff 89109864Sjeff#include <ddb/ddb.h> 90109864Sjeff#include <ddb/db_sym.h> 91109864Sjeff 92109864Sjeffvoid mi_startup(void); /* Should be elsewhere */ 93109864Sjeff 94109864Sjeff/* Components of the first process -- never freed. */ 95113357Sjeffstatic struct session session0; 96110260Sjeffstatic struct pgrp pgrp0; 97109864Sjeffstruct proc proc0; 98109864Sjeffstruct thread thread0 __aligned(16); 99109864Sjeffstruct vmspace vmspace0; 100109864Sjeffstruct proc *initproc; 101109864Sjeff 102109864Sjeffint boothowto = 0; /* initialized so that it can be patched */ 103110260SjeffSYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, ""); 104121790Sjeffint bootverbose; 105109864SjeffSYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, ""); 106121790Sjeff 107122158Sjeff/* 108121790Sjeff * This ensures that there is at least one entry so that the sysinit_set 109109864Sjeff * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never 110110645Sjeff * executed. 111110645Sjeff */ 112109864SjeffSYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL); 113109864Sjeff 114110645Sjeff/* 115109864Sjeff * The sysinit table itself. Items are checked off as the are run. 116109864Sjeff * If we want to register new sysinit types, add them to newsysinit. 117109864Sjeff */ 118109864SjeffSET_DECLARE(sysinit_set, struct sysinit); 119109864Sjeffstruct sysinit **sysinit, **sysinit_end; 120109864Sjeffstruct sysinit **newsysinit, **newsysinit_end; 121110267Sjeff 122109864Sjeff/* 123109864Sjeff * Merge a new sysinit set into the current set, reallocating it if 124109864Sjeff * necessary. This can only be called after malloc is running. 125109864Sjeff */ 126109864Sjeffvoid 127109864Sjeffsysinit_add(struct sysinit **set, struct sysinit **set_end) 128109864Sjeff{ 129109864Sjeff struct sysinit **newset; 130109864Sjeff struct sysinit **sipp; 131116642Sjeff struct sysinit **xipp; 132116642Sjeff int count; 133116642Sjeff 134116642Sjeff count = set_end - set; 135111857Sjeff if (newsysinit) 136111857Sjeff count += newsysinit_end - newsysinit; 137116642Sjeff else 138111857Sjeff count += sysinit_end - sysinit; 139109864Sjeff newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT); 140111857Sjeff if (newset == NULL) 141121869Sjeff panic("cannot malloc for sysinit"); 142121869Sjeff xipp = newset; 143116642Sjeff if (newsysinit) 144113357Sjeff for (sipp = newsysinit; sipp < newsysinit_end; sipp++) 145116642Sjeff *xipp++ = *sipp; 146109864Sjeff else 147109864Sjeff for (sipp = sysinit; sipp < sysinit_end; sipp++) 148111857Sjeff *xipp++ = *sipp; 149109864Sjeff for (sipp = set; sipp < set_end; sipp++) 150110645Sjeff *xipp++ = *sipp; 151110645Sjeff if (newsysinit) 152121868Sjeff free(newsysinit, M_TEMP); 153116365Sjeff newsysinit = newset; 154111857Sjeff newsysinit_end = newset + count; 155109864Sjeff} 156121126Sjeff 157121868Sjeff/* 158116365Sjeff * System startup; initialize the world, create process 0, mount root 159116365Sjeff * filesystem, and fork to create init and pagedaemon. Most of the 160121126Sjeff * hard work is done in the lower-level initialization routines including 161111857Sjeff * startup(), which does memory initialization and autoconfiguration. 162109864Sjeff * 163109864Sjeff * This allows simple addition of new kernel subsystems that require 164109864Sjeff * boot time initialization. It also allows substitution of subsystem 165109864Sjeff * (for instance, a scheduler, kernel profiler, or VM system) by object 166109864Sjeff * module. Finally, it allows for optional "kernel threads". 167109864Sjeff */ 168109864Sjeffvoid 169112966Sjeffmi_startup(void) 170112966Sjeff{ 171121871Sjeff 172109864Sjeff register struct sysinit **sipp; /* system initialization*/ 173113357Sjeff register struct sysinit **xipp; /* interior loop of sort*/ 174113357Sjeff register struct sysinit *save; /* bubble*/ 175125299Sjeff 176121871Sjeff#if defined(VERBOSE_SYSINIT) 177111857Sjeff int last; 178109864Sjeff int verbose; 179112966Sjeff#endif 180121871Sjeff 181109864Sjeff if (sysinit == NULL) { 182109864Sjeff sysinit = SET_BEGIN(sysinit_set); 183109864Sjeff sysinit_end = SET_LIMIT(sysinit_set); 184109864Sjeff } 185109864Sjeff 186113357Sjeffrestart: 187113357Sjeff /* 188113417Sjeff * Perform a bubble sort of the system initialization objects by 189127278Sobrien * their subsystem (primary key) and order (secondary key). 190121107Sjeff */ 191109864Sjeff for (sipp = sysinit; sipp < sysinit_end; sipp++) { 192109864Sjeff for (xipp = sipp + 1; xipp < sysinit_end; xipp++) { 193109864Sjeff if ((*sipp)->subsystem < (*xipp)->subsystem || 194109864Sjeff ((*sipp)->subsystem == (*xipp)->subsystem && 195109864Sjeff (*sipp)->order <= (*xipp)->order)) 196109864Sjeff continue; /* skip*/ 197109864Sjeff save = *sipp; 198109864Sjeff *sipp = *xipp; 199112971Sjeff *xipp = save; 200109864Sjeff } 201109864Sjeff } 202109864Sjeff 203113357Sjeff#if defined(VERBOSE_SYSINIT) 204109864Sjeff last = SI_SUB_COPYRIGHT; 205109864Sjeff verbose = 0; 206113357Sjeff#if !defined(DDB) 207113357Sjeff printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n"); 208113357Sjeff#endif 209113357Sjeff#endif 210121896Sjeff 211113357Sjeff /* 212121869Sjeff * Traverse the (now) ordered list of system initialization tasks. 213113357Sjeff * Perform each task, and continue on to the next task. 214110267Sjeff * 215123433Sjeff * The last item on the list is expected to be the scheduler, 216123433Sjeff * which will not return. 217123433Sjeff */ 218123433Sjeff for (sipp = sysinit; sipp < sysinit_end; sipp++) { 219125289Sjeff 220125289Sjeff if ((*sipp)->subsystem == SI_SUB_DUMMY) 221110267Sjeff continue; /* skip dummy task(s)*/ 222109864Sjeff 223109864Sjeff if ((*sipp)->subsystem == SI_SUB_DONE) 224123433Sjeff continue; 225109864Sjeff 226123433Sjeff#if defined(VERBOSE_SYSINIT) 227123433Sjeff if ((*sipp)->subsystem > last) { 228123433Sjeff verbose = 1; 229123433Sjeff last = (*sipp)->subsystem; 230123433Sjeff printf("subsystem %x\n", last); 231123433Sjeff } 232123433Sjeff if (verbose) { 233123433Sjeff#if defined(DDB) 234123433Sjeff const char *name; 235127498Smarcel c_db_sym_t sym; 236127498Smarcel db_expr_t offset; 237127498Smarcel 238123487Sjeff sym = db_search_symbol((vm_offset_t)(*sipp)->func, 239123433Sjeff DB_STGY_PROC, &offset); 240123433Sjeff db_symbol_values(sym, &name, NULL); 241123433Sjeff if (name != NULL) 242123433Sjeff printf(" %s(%p)... ", name, (*sipp)->udata); 243123433Sjeff else 244123433Sjeff#endif 245109864Sjeff printf(" %p(%p)... ", (*sipp)->func, 246109864Sjeff (*sipp)->udata); 247110028Sjeff } 248127498Smarcel#endif 249123487Sjeff 250121790Sjeff /* Call function */ 251123433Sjeff (*((*sipp)->func))((*sipp)->udata); 252123433Sjeff 253123433Sjeff#if defined(VERBOSE_SYSINIT) 254123487Sjeff if (verbose) 255123487Sjeff printf("done.\n"); 256123433Sjeff#endif 257121790Sjeff 258110028Sjeff /* Check off the one we're just done */ 259110028Sjeff (*sipp)->subsystem = SI_SUB_DONE; 260110028Sjeff 261109864Sjeff /* Check if we've installed more sysinit items via KLD */ 262112966Sjeff if (newsysinit != NULL) { 263113357Sjeff if (sysinit != SET_BEGIN(sysinit_set)) 264111857Sjeff free(sysinit, M_TEMP); 265116463Sjeff sysinit = newsysinit; 266121868Sjeff sysinit_end = newsysinit_end; 267121790Sjeff newsysinit = NULL; 268109864Sjeff newsysinit_end = NULL; 269110267Sjeff goto restart; 270121790Sjeff } 271110028Sjeff } 272122744Sjeff 273122744Sjeff panic("Shouldn't get here!"); 274122744Sjeff /* NOTREACHED*/ 275122744Sjeff} 276113357Sjeff 277113357Sjeff 278113660Sjeff/* 279110267Sjeff *************************************************************************** 280123433Sjeff **** 281121790Sjeff **** The following SYSINIT's belong elsewhere, but have not yet 282122744Sjeff **** been moved. 283123487Sjeff **** 284123487Sjeff *************************************************************************** 285121790Sjeff */ 286123433Sjeffstatic void 287121790Sjeffprint_caddr_t(void *data) 288121790Sjeff{ 289123433Sjeff printf("%s", (char *)data); 290123693Sjeff} 291123693Sjeff 292123693Sjeffstatic void 293123693Sjeffprint_version(void *data __unused) 294123693Sjeff{ 295123693Sjeff int len; 296122038Sjeff 297123693Sjeff /* Strip a trailing newline from version. */ 298123693Sjeff len = strlen(version); 299123693Sjeff while (len > 0 && version[len - 1] == '\n') 300122158Sjeff len--; 301122165Sjeff printf("%.*s %s\n", len, version, machine); 302123693Sjeff} 303121790Sjeff 304110028SjeffSYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, 305113357Sjeff copyright); 306113660SjeffSYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, 307110267Sjeff trademark); 308113660SjeffSYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL); 309113357Sjeff 310112994Sjeff#ifdef WITNESS 311113660Sjeffstatic char wit_warn[] = 312112994Sjeff "WARNING: WITNESS option enabled, expect reduced performance.\n"; 313113357SjeffSYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1, 314113357Sjeff print_caddr_t, wit_warn); 315122744SjeffSYSINIT(witwarn2, SI_SUB_RUN_SCHEDULER, SI_ORDER_THIRD + 1, 316121896Sjeff print_caddr_t, wit_warn); 317123433Sjeff#endif 318121896Sjeff 319113357Sjeff#ifdef DIAGNOSTIC 320113357Sjeffstatic char diag_warn[] = 321121869Sjeff "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n"; 322113357SjeffSYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 2, 323113357Sjeff print_caddr_t, diag_warn); 324113357SjeffSYSINIT(diagwarn2, SI_SUB_RUN_SCHEDULER, SI_ORDER_THIRD + 2, 325113357Sjeff print_caddr_t, diag_warn); 326112994Sjeff#endif 327122744Sjeff 328122744Sjeffstatic void 329122744Sjeffset_boot_verbose(void *data __unused) 330122744Sjeff{ 331123433Sjeff 332123433Sjeff if (boothowto & RB_VERBOSE) 333123433Sjeff bootverbose++; 334123433Sjeff} 335122744SjeffSYSINIT(boot_verbose, SI_SUB_TUNABLES, SI_ORDER_ANY, set_boot_verbose, NULL); 336122744Sjeff 337122744Sjeffstruct sysentvec null_sysvec = { 338122744Sjeff .sv_size = 0, 339122744Sjeff .sv_table = NULL, 340122744Sjeff .sv_mask = 0, 341122744Sjeff .sv_sigsize = 0, 342122744Sjeff .sv_sigtbl = NULL, 343123433Sjeff .sv_errsize = 0, 344123433Sjeff .sv_errtbl = NULL, 345123433Sjeff .sv_transtrap = NULL, 346123433Sjeff .sv_fixup = NULL, 347122744Sjeff .sv_sendsig = NULL, 348122744Sjeff .sv_sigcode = NULL, 349122744Sjeff .sv_szsigcode = NULL, 350122744Sjeff .sv_prepsyscall = NULL, 351113357Sjeff .sv_name = "null", 352122744Sjeff .sv_coredump = NULL, 353113357Sjeff .sv_imgact_try = NULL, 354121896Sjeff .sv_minsigstksz = 0, 355115998Sjeff .sv_pagesize = PAGE_SIZE, 356121896Sjeff .sv_minuser = VM_MIN_ADDRESS, 357121896Sjeff .sv_maxuser = VM_MAXUSER_ADDRESS, 358121896Sjeff .sv_usrstack = USRSTACK, 359113357Sjeff .sv_psstrings = PS_STRINGS, 360125289Sjeff .sv_stackprot = VM_PROT_ALL, 361123487Sjeff .sv_copyout_strings = NULL, 362123487Sjeff .sv_setregs = NULL, 363125289Sjeff .sv_fixlimit = NULL, 364125289Sjeff .sv_maxssiz = NULL 365123487Sjeff}; 366113357Sjeff 367122744Sjeff/* 368122744Sjeff *************************************************************************** 369122744Sjeff **** 370122744Sjeff **** The two following SYSINIT's are proc0 specific glue code. I am not 371113357Sjeff **** convinced that they can not be safely combined, but their order of 372113357Sjeff **** operation has been maintained as the same as the original init_main.c 373110267Sjeff **** for right now. 374113357Sjeff **** 375112994Sjeff **** These probably belong in init_proc.c or kern_proc.c, since they 376122744Sjeff **** deal with proc0 (the fork template process). 377110267Sjeff **** 378121896Sjeff *************************************************************************** 379115998Sjeff */ 380121896Sjeff/* ARGSUSED*/ 381121896Sjeffstatic void 382121896Sjeffproc0_init(void *dummy __unused) 383125289Sjeff{ 384123487Sjeff struct proc *p; 385123487Sjeff unsigned i; 386125289Sjeff struct thread *td; 387125289Sjeff 388123487Sjeff GIANT_REQUIRED; 389113357Sjeff p = &proc0; 390113357Sjeff td = &thread0; 391113357Sjeff 392113357Sjeff /* 393110267Sjeff * Initialize magic number and osrel. 394110267Sjeff */ 395113357Sjeff p->p_magic = P_MAGIC; 396113357Sjeff p->p_osrel = osreldate; 397110267Sjeff 398115998Sjeff /* 399113357Sjeff * Initialize thread and process structures. 400113357Sjeff */ 401121896Sjeff procinit(); /* set up proc zone */ 402113357Sjeff threadinit(); /* set up UMA zones */ 403110267Sjeff 404110267Sjeff /* 405113357Sjeff * Initialise scheduler resources. 406113357Sjeff * Add scheduler specific parts to proc, thread as needed. 407110267Sjeff */ 408113357Sjeff schedinit(); /* scheduler gets its house in order */ 409113357Sjeff /* 410115998Sjeff * Initialize sleep queue hash table 411113357Sjeff */ 412113357Sjeff sleepinit(); 413113357Sjeff 414113357Sjeff /* 415113357Sjeff * additional VM structures 416113357Sjeff */ 417113357Sjeff vm_init2(); 418113357Sjeff 419113357Sjeff /* 420113357Sjeff * Create process 0 (the swapper). 421113357Sjeff */ 422113357Sjeff LIST_INSERT_HEAD(&allproc, p, p_list); 423121896Sjeff LIST_INSERT_HEAD(PIDHASH(0), p, p_hash); 424113357Sjeff mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); 425113357Sjeff p->p_pgrp = &pgrp0; 426121869Sjeff LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); 427113357Sjeff LIST_INIT(&pgrp0.pg_members); 428113357Sjeff LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); 429113357Sjeff 430113357Sjeff pgrp0.pg_session = &session0; 431110267Sjeff mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF); 432110267Sjeff refcount_init(&session0.s_count, 1); 433113357Sjeff session0.s_leader = p; 434116069Sjeff 435122744Sjeff p->p_sysent = &null_sysvec; 436116069Sjeff p->p_flag = P_SYSTEM | P_INMEM; 437116069Sjeff p->p_state = PRS_NORMAL; 438116069Sjeff knlist_init_mtx(&p->p_klist, &p->p_mtx); 439116069Sjeff STAILQ_INIT(&p->p_ktr); 440116069Sjeff p->p_nice = NZERO; 441116069Sjeff td->td_tid = PID_MAX + 1; 442116069Sjeff td->td_state = TDS_RUNNING; 443116069Sjeff td->td_pri_class = PRI_TIMESHARE; 444116069Sjeff td->td_user_pri = PUSER; 445116069Sjeff td->td_base_user_pri = PUSER; 446116069Sjeff td->td_priority = PVM; 447116069Sjeff td->td_base_pri = PUSER; 448116069Sjeff td->td_oncpu = 0; 449116069Sjeff td->td_flags = TDF_INMEM|TDP_KTHREAD; 450121790Sjeff td->td_cpuset = cpuset_thread0(); 451122744Sjeff prison0.pr_cpuset = cpuset_ref(td->td_cpuset); 452116069Sjeff p->p_peers = 0; 453123487Sjeff p->p_leader = p; 454123487Sjeff 455123487Sjeff 456123487Sjeff strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); 457123487Sjeff strncpy(td->td_name, "swapper", sizeof (td->td_name)); 458123487Sjeff 459123487Sjeff callout_init(&p->p_itcallout, CALLOUT_MPSAFE); 460123487Sjeff callout_init_mtx(&p->p_limco, &p->p_mtx, 0); 461123487Sjeff callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); 462123487Sjeff 463123487Sjeff /* Create credentials. */ 464123487Sjeff p->p_ucred = crget(); 465123487Sjeff p->p_ucred->cr_ngroups = 1; /* group 0 */ 466123487Sjeff p->p_ucred->cr_uidinfo = uifind(0); 467123487Sjeff p->p_ucred->cr_ruidinfo = uifind(0); 468123487Sjeff p->p_ucred->cr_prison = &prison0; 469123487Sjeff#ifdef AUDIT 470123487Sjeff audit_cred_kproc0(p->p_ucred); 471123487Sjeff#endif 472123487Sjeff#ifdef MAC 473123487Sjeff mac_cred_create_swapper(p->p_ucred); 474123487Sjeff#endif 475123487Sjeff td->td_ucred = crhold(p->p_ucred); 476123487Sjeff 477123487Sjeff /* Create sigacts. */ 478123487Sjeff p->p_sigacts = sigacts_alloc(); 479123487Sjeff 480123487Sjeff /* Initialize signal state for process 0. */ 481123487Sjeff siginit(&proc0); 482123487Sjeff 483123487Sjeff /* Create the file descriptor table. */ 484123487Sjeff p->p_fd = fdinit(NULL); 485123487Sjeff p->p_fdtol = NULL; 486123487Sjeff 487123487Sjeff /* Create the limits structures. */ 488123487Sjeff p->p_limit = lim_alloc(); 489123487Sjeff for (i = 0; i < RLIM_NLIMITS; i++) 490123487Sjeff p->p_limit->pl_rlimit[i].rlim_cur = 491123487Sjeff p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; 492123487Sjeff p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = 493123487Sjeff p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; 494123487Sjeff p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = 495123487Sjeff p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; 496123487Sjeff i = ptoa(cnt.v_free_count); 497123487Sjeff p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = i; 498123487Sjeff p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i; 499123487Sjeff p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3; 500123487Sjeff p->p_cpulimit = RLIM_INFINITY; 501123487Sjeff 502123487Sjeff p->p_stats = pstats_alloc(); 503123487Sjeff 504123487Sjeff /* Allocate a prototype map so we have something to fork. */ 505123487Sjeff pmap_pinit0(vmspace_pmap(&vmspace0)); 506116069Sjeff p->p_vmspace = &vmspace0; 507123487Sjeff vmspace0.vm_refcnt = 1; 508123487Sjeff 509123487Sjeff /* 510123487Sjeff * proc0 is not expected to enter usermode, so there is no special 511123487Sjeff * handling for sv_minuser here, like is done for exec_new_vmspace(). 512123487Sjeff */ 513123487Sjeff vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0), 514123487Sjeff p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser); 515123487Sjeff 516123487Sjeff /*- 517123487Sjeff * call the init and ctor for the new thread and proc 518123487Sjeff * we wait to do this until all other structures 519123487Sjeff * are fairly sane. 520123487Sjeff */ 521123487Sjeff EVENTHANDLER_INVOKE(process_init, p); 522123487Sjeff EVENTHANDLER_INVOKE(thread_init, td); 523123487Sjeff EVENTHANDLER_INVOKE(process_ctor, p); 524123487Sjeff EVENTHANDLER_INVOKE(thread_ctor, td); 525123487Sjeff 526123487Sjeff /* 527123487Sjeff * Charge root for one process. 528123487Sjeff */ 529123487Sjeff (void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0); 530123487Sjeff} 531123433SjeffSYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL); 532116069Sjeff 533116069Sjeff/* ARGSUSED*/ 534116069Sjeffstatic void 535116069Sjeffproc0_post(void *dummy __unused) 536116069Sjeff{ 537116069Sjeff struct timespec ts; 538116069Sjeff struct proc *p; 539123433Sjeff struct rusage ru; 540123433Sjeff struct thread *td; 541123433Sjeff 542123433Sjeff /* 543123487Sjeff * Now we can look at the time, having had a chance to verify the 544123487Sjeff * time from the filesystem. Pretend that proc0 started now. 545123487Sjeff */ 546123487Sjeff sx_slock(&allproc_lock); 547123487Sjeff FOREACH_PROC_IN_SYSTEM(p) { 548123487Sjeff microuptime(&p->p_stats->p_start); 549123487Sjeff PROC_SLOCK(p); 550123487Sjeff rufetch(p, &ru); /* Clears thread stats */ 551123487Sjeff PROC_SUNLOCK(p); 552123487Sjeff p->p_rux.rux_runtime = 0; 553123487Sjeff p->p_rux.rux_uticks = 0; 554123487Sjeff p->p_rux.rux_sticks = 0; 555123487Sjeff p->p_rux.rux_iticks = 0; 556123487Sjeff FOREACH_THREAD_IN_PROC(p, td) { 557123487Sjeff td->td_runtime = 0; 558123487Sjeff } 559123487Sjeff } 560123487Sjeff sx_sunlock(&allproc_lock); 561123487Sjeff PCPU_SET(switchtime, cpu_ticks()); 562123433Sjeff PCPU_SET(switchticks, ticks); 563123487Sjeff 564123433Sjeff /* 565122744Sjeff * Give the ``random'' number generator a thump. 566123433Sjeff */ 567122744Sjeff nanotime(&ts); 568123487Sjeff srandom(ts.tv_sec ^ ts.tv_nsec); 569116069Sjeff} 570116069SjeffSYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL); 571116069Sjeff 572123433Sjeffstatic void 573116069Sjeffrandom_init(void *dummy __unused) 574123487Sjeff{ 575116069Sjeff 576116069Sjeff /* 577116069Sjeff * After CPU has been started we have some randomness on most 578121790Sjeff * platforms via get_cyclecount(). For platforms that don't 579116069Sjeff * we will reseed random(9) in proc0_post() as well. 580116069Sjeff */ 581123433Sjeff srandom(get_cyclecount()); 582123433Sjeff} 583116069SjeffSYSINIT(random, SI_SUB_RANDOM, SI_ORDER_FIRST, random_init, NULL); 584116069Sjeff 585123433Sjeff/* 586123433Sjeff *************************************************************************** 587123433Sjeff **** 588123433Sjeff **** The following SYSINIT's and glue code should be moved to the 589123433Sjeff **** respective files on a per subsystem basis. 590123433Sjeff **** 591123433Sjeff *************************************************************************** 592123433Sjeff */ 593123433Sjeff 594123433Sjeff 595123433Sjeff/* 596123433Sjeff *************************************************************************** 597123433Sjeff **** 598123433Sjeff **** The following code probably belongs in another file, like 599123433Sjeff **** kern/init_init.c. 600123433Sjeff **** 601123433Sjeff *************************************************************************** 602123433Sjeff */ 603123433Sjeff 604123433Sjeff/* 605116069Sjeff * List of paths to try when searching for "init". 606123433Sjeff */ 607123433Sjeffstatic char init_path[MAXPATHLEN] = 608121923Sjeff#ifdef INIT_PATH 609116069Sjeff __XSTRING(INIT_PATH); 610110267Sjeff#else 611123433Sjeff "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init:/stand/sysinstall"; 612123433Sjeff#endif 613121790SjeffSYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, 614123433Sjeff "Path used to search the init process"); 615123433Sjeff 616123433Sjeff/* 617123433Sjeff * Shutdown timeout of init(8). 618123433Sjeff * Unused within kernel, but used to control init(8), hence do not remove. 619123433Sjeff */ 620123433Sjeff#ifndef INIT_SHUTDOWN_TIMEOUT 621123433Sjeff#define INIT_SHUTDOWN_TIMEOUT 120 622123433Sjeff#endif 623123433Sjeffstatic int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT; 624123433SjeffSYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout, 625123433Sjeff CTLFLAG_RW, &init_shutdown_timeout, 0, ""); 626123433Sjeff 627123433Sjeff/* 628123433Sjeff * Start the initial user process; try exec'ing each pathname in init_path. 629123433Sjeff * The program is invoked with one argument containing the boot flags. 630123433Sjeff */ 631123433Sjeffstatic void 632123433Sjeffstart_init(void *dummy) 633123433Sjeff{ 634123433Sjeff vm_offset_t addr; 635123433Sjeff struct execve_args args; 636123433Sjeff int options, error; 637123433Sjeff char *var, *path, *next, *s; 638123433Sjeff char *ucp, **uap, *arg0, *arg1; 639123433Sjeff struct thread *td; 640123433Sjeff struct proc *p; 641123433Sjeff 642123433Sjeff mtx_lock(&Giant); 643123433Sjeff 644123433Sjeff GIANT_REQUIRED; 645123433Sjeff 646123433Sjeff td = curthread; 647123433Sjeff p = td->td_proc; 648121790Sjeff 649121790Sjeff vfs_mountroot(); 650121790Sjeff 651121790Sjeff /* 652121790Sjeff * Need just enough stack to hold the faked-up "execve()" arguments. 653121790Sjeff */ 654121790Sjeff addr = p->p_sysent->sv_usrstack - PAGE_SIZE; 655121790Sjeff if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, 656121790Sjeff FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0) 657122848Sjeff panic("init: couldn't allocate argument space"); 658121790Sjeff p->p_vmspace->vm_maxsaddr = (caddr_t)addr; 659121790Sjeff p->p_vmspace->vm_ssize = 1; 660121790Sjeff 661121790Sjeff if ((var = getenv("init_path")) != NULL) { 662121790Sjeff strlcpy(init_path, var, sizeof(init_path)); 663121790Sjeff freeenv(var); 664121790Sjeff } 665121790Sjeff 666121790Sjeff for (path = init_path; *path != '\0'; path = next) { 667121790Sjeff while (*path == ':') 668121790Sjeff path++; 669121790Sjeff if (*path == '\0') 670121790Sjeff break; 671121790Sjeff for (next = path; *next != '\0' && *next != ':'; next++) 672121790Sjeff /* nothing */ ; 673123529Sjeff if (bootverbose) 674121790Sjeff printf("start_init: trying %.*s\n", (int)(next - path), 675121790Sjeff path); 676121790Sjeff 677121790Sjeff /* 678121790Sjeff * Move out the boot flag argument. 679121790Sjeff */ 680121790Sjeff options = 0; 681121790Sjeff ucp = (char *)p->p_sysent->sv_usrstack; 682122848Sjeff (void)subyte(--ucp, 0); /* trailing zero */ 683121790Sjeff if (boothowto & RB_SINGLE) { 684121790Sjeff (void)subyte(--ucp, 's'); 685121790Sjeff options = 1; 686121790Sjeff } 687121790Sjeff#ifdef notyet 688121790Sjeff if (boothowto & RB_FASTBOOT) { 689121790Sjeff (void)subyte(--ucp, 'f'); 690121790Sjeff options = 1; 691121790Sjeff } 692121790Sjeff#endif 693121790Sjeff 694121790Sjeff#ifdef BOOTCDROM 695121790Sjeff (void)subyte(--ucp, 'C'); 696121790Sjeff options = 1; 697121790Sjeff#endif 698121790Sjeff 699121790Sjeff if (options == 0) 700121790Sjeff (void)subyte(--ucp, '-'); 701121790Sjeff (void)subyte(--ucp, '-'); /* leading hyphen */ 702121790Sjeff arg1 = ucp; 703121790Sjeff 704121790Sjeff /* 705121790Sjeff * Move out the file name (also arg 0). 706121790Sjeff */ 707121790Sjeff (void)subyte(--ucp, 0); 708123231Speter for (s = next - 1; s >= path; s--) 709121790Sjeff (void)subyte(--ucp, *s); 710121790Sjeff arg0 = ucp; 711121790Sjeff 712121896Sjeff /* 713121896Sjeff * Move out the arg pointers. 714121790Sjeff */ 715121790Sjeff uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1)); 716121790Sjeff (void)suword((caddr_t)--uap, (long)0); /* terminator */ 717121790Sjeff (void)suword((caddr_t)--uap, (long)(intptr_t)arg1); 718121790Sjeff (void)suword((caddr_t)--uap, (long)(intptr_t)arg0); 719121790Sjeff 720121790Sjeff /* 721121790Sjeff * Point at the arguments. 722123433Sjeff */ 723121790Sjeff args.fname = arg0; 724121790Sjeff args.argv = uap; 725121790Sjeff args.envv = NULL; 726123433Sjeff 727123433Sjeff /* 728123433Sjeff * Now try to exec the program. If can't for any reason 729123433Sjeff * other than it doesn't exist, complain. 730123433Sjeff * 731123433Sjeff * Otherwise, return via fork_trampoline() all the way 732121790Sjeff * to user mode as init! 733121790Sjeff */ 734123433Sjeff if ((error = execve(td, &args)) == 0) { 735123433Sjeff mtx_unlock(&Giant); 736123433Sjeff return; 737121790Sjeff } 738123433Sjeff if (error != ENOENT) 739123433Sjeff printf("exec %.*s: error %d\n", (int)(next - path), 740123433Sjeff path, error); 741123433Sjeff } 742123433Sjeff printf("init: not found in path %s\n", init_path); 743123433Sjeff panic("no init"); 744123433Sjeff} 745123685Sjeff 746123685Sjeff/* 747123433Sjeff * Like kproc_create(), but runs in it's own address space. 748123433Sjeff * We do this early to reserve pid 1. 749123433Sjeff * 750123433Sjeff * Note special case - do not make it runnable yet. Other work 751123685Sjeff * in progress will change this more. 752123685Sjeff */ 753123685Sjeffstatic void 754123685Sjeffcreate_init(const void *udata __unused) 755123685Sjeff{ 756123685Sjeff struct ucred *newcred, *oldcred; 757123694Sjeff int error; 758123433Sjeff 759123433Sjeff error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc); 760123433Sjeff if (error) 761123433Sjeff panic("cannot fork init: %d\n", error); 762123433Sjeff KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); 763123433Sjeff /* divorce init's credentials from the kernel's */ 764123433Sjeff newcred = crget(); 765123433Sjeff PROC_LOCK(initproc); 766123433Sjeff initproc->p_flag |= P_SYSTEM | P_INMEM; 767123433Sjeff oldcred = initproc->p_ucred; 768123433Sjeff crcopy(newcred, oldcred); 769123433Sjeff#ifdef MAC 770123433Sjeff mac_cred_create_init(newcred); 771123433Sjeff#endif 772123433Sjeff#ifdef AUDIT 773123433Sjeff audit_cred_proc1(newcred); 774123433Sjeff#endif 775123433Sjeff initproc->p_ucred = newcred; 776123433Sjeff PROC_UNLOCK(initproc); 777123433Sjeff crfree(oldcred); 778123433Sjeff cred_update_thread(FIRST_THREAD_IN_PROC(initproc)); 779123433Sjeff cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL); 780123433Sjeff} 781123433SjeffSYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL); 782123433Sjeff 783123433Sjeff/* 784123433Sjeff * Make it runnable now. 785123433Sjeff */ 786123433Sjeffstatic void 787121790Sjeffkick_init(const void *udata __unused) 788121790Sjeff{ 789117326Sjeff struct thread *td; 790121790Sjeff 791117326Sjeff td = FIRST_THREAD_IN_PROC(initproc); 792117326Sjeff thread_lock(td); 793121790Sjeff TD_SET_CAN_RUN(td); 794121790Sjeff sched_add(td, SRQ_BORING); 795110267Sjeff thread_unlock(td); 796110267Sjeff} 797110267SjeffSYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL); 798110267Sjeff