1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27/*	  All Rights Reserved  	*/
28
29/*
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
33 *
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
37 */
38
39/*
40 * init(1M) is the general process spawning program.  Its primary job is to
41 * start and restart svc.startd for smf(5).  For backwards-compatibility it also
42 * spawns and respawns processes according to /etc/inittab and the current
43 * run-level.  It reads /etc/default/inittab for general configuration.
44 *
45 * To change run-levels the system administrator runs init from the command
46 * line with a level name.  init signals svc.startd via libscf and directs the
47 * zone's init (pid 1 in the global zone) what to do by sending it a signal;
48 * these signal numbers are commonly refered to in the code as 'states'.  Valid
49 * run-levels are [sS0123456].  Additionally, init can be given directives
50 * [qQabc], which indicate actions to be taken pertaining to /etc/inittab.
51 *
52 * When init processes inittab entries, it finds processes that are to be
53 * spawned at various run-levels.  inittab contains the set of the levels for
54 * which each inittab entry is valid.
55 *
56 * State File and Restartability
57 *   Premature exit by init(1M) is handled as a special case by the kernel:
58 *   init(1M) will be immediately re-executed, retaining its original PID.  (PID
59 *   1 in the global zone.)  To track the processes it has previously spawned,
60 *   as well as other mutable state, init(1M) regularly updates a state file
61 *   such that its subsequent invocations have knowledge of its various
62 *   dependent processes and duties.
63 *
64 * Process Contracts
65 *   We start svc.startd(1M) in a contract and transfer inherited contracts when
66 *   restarting it.  Everything else is started using the legacy contract
67 *   template, and the created contracts are abandoned when they become empty.
68 *
69 * utmpx Entry Handling
70 *   Because init(1M) no longer governs the startup process, its knowledge of
71 *   when utmpx becomes writable is indirect.  However, spawned processes
72 *   expect to be constructed with valid utmpx entries.  As a result, attempts
73 *   to write normal entries will be retried until successful.
74 *
75 * Maintenance Mode
76 *   In certain failure scenarios, init(1M) will enter a maintenance mode, in
77 *   which it invokes sulogin(1M) to allow the operator an opportunity to
78 *   repair the system.  Normally, this operation is performed as a
79 *   fork(2)-exec(2)-waitpid(3C) sequence with the parent waiting for repair or
80 *   diagnosis to be completed.  In the cases that fork(2) requests themselves
81 *   fail, init(1M) will directly execute sulogin(1M), and allow the kernel to
82 *   restart init(1M) on exit from the operator session.
83 *
84 *   One scenario where init(1M) enters its maintenance mode is when
85 *   svc.startd(1M) begins to fail rapidly, defined as when the average time
86 *   between recent failures drops below a given threshold.
87 */
88
89#include <sys/contract/process.h>
90#include <sys/ctfs.h>
91#include <sys/stat.h>
92#include <sys/statvfs.h>
93#include <sys/stropts.h>
94#include <sys/systeminfo.h>
95#include <sys/time.h>
96#include <sys/termios.h>
97#include <sys/tty.h>
98#include <sys/types.h>
99#include <sys/utsname.h>
100
101#include <bsm/adt_event.h>
102#include <bsm/libbsm.h>
103#include <security/pam_appl.h>
104
105#include <assert.h>
106#include <ctype.h>
107#include <dirent.h>
108#include <errno.h>
109#include <fcntl.h>
110#include <libcontract.h>
111#include <libcontract_priv.h>
112#include <libintl.h>
113#include <libscf.h>
114#include <libscf_priv.h>
115#include <poll.h>
116#include <procfs.h>
117#include <signal.h>
118#include <stdarg.h>
119#include <stdio.h>
120#include <stdio_ext.h>
121#include <stdlib.h>
122#include <string.h>
123#include <strings.h>
124#include <syslog.h>
125#include <time.h>
126#include <ulimit.h>
127#include <unistd.h>
128#include <utmpx.h>
129#include <wait.h>
130#include <zone.h>
131#include <ucontext.h>
132
133#undef	sleep
134
135#define	fioctl(p, sptr, cmd)	ioctl(fileno(p), sptr, cmd)
136#define	min(a, b)		(((a) < (b)) ? (a) : (b))
137
138#define	TRUE	1
139#define	FALSE	0
140#define	FAILURE	-1
141
142#define	UT_LINE_SZ	32	/* Size of a utmpx ut_line field */
143
144/*
145 * SLEEPTIME	The number of seconds "init" sleeps between wakeups if
146 *		nothing else requires this "init" wakeup.
147 */
148#define	SLEEPTIME	(5 * 60)
149
150/*
151 * MAXCMDL	The maximum length of a command string in inittab.
152 */
153#define	MAXCMDL	512
154
155/*
156 * EXEC		The length of the prefix string added to all comamnds
157 *		found in inittab.
158 */
159#define	EXEC	(sizeof ("exec ") - 1)
160
161/*
162 * TWARN	The amount of time between warning signal, SIGTERM,
163 *		and the fatal kill signal, SIGKILL.
164 */
165#define	TWARN	5
166
167#define	id_eq(x, y)	((x[0] == y[0] && x[1] == y[1] && x[2] == y[2] &&\
168			x[3] == y[3]) ? TRUE : FALSE)
169
170/*
171 * The kernel's default umask is 022 these days; since some processes inherit
172 * their umask from init, init will set it from CMASK in /etc/default/init.
173 * init gets the default umask from the kernel, it sets it to 022 whenever
174 * it wants to create a file and reverts to CMASK afterwards.
175 */
176
177static int cmask;
178
179/*
180 * The following definitions, concluding with the 'lvls' array, provide a
181 * common mapping between level-name (like 'S'), signal number (state),
182 * run-level mask, and specific properties associated with a run-level.
183 * This array should be accessed using the routines lvlname_to_state(),
184 * lvlname_to_mask(), state_to_mask(), and state_to_flags().
185 */
186
187/*
188 * Correspondence of signals to init actions.
189 */
190#define	LVLQ		SIGHUP
191#define	LVL0		SIGINT
192#define	LVL1		SIGQUIT
193#define	LVL2		SIGILL
194#define	LVL3		SIGTRAP
195#define	LVL4		SIGIOT
196#define	LVL5		SIGEMT
197#define	LVL6		SIGFPE
198#define	SINGLE_USER	SIGBUS
199#define	LVLa		SIGSEGV
200#define	LVLb		SIGSYS
201#define	LVLc		SIGPIPE
202
203/*
204 * Bit Mask for each level.  Used to determine legal levels.
205 */
206#define	MASK0	0x0001
207#define	MASK1	0x0002
208#define	MASK2	0x0004
209#define	MASK3	0x0008
210#define	MASK4	0x0010
211#define	MASK5	0x0020
212#define	MASK6	0x0040
213#define	MASKSU	0x0080
214#define	MASKa	0x0100
215#define	MASKb	0x0200
216#define	MASKc	0x0400
217
218#define	MASK_NUMERIC (MASK0 | MASK1 | MASK2 | MASK3 | MASK4 | MASK5 | MASK6)
219#define	MASK_abc (MASKa | MASKb | MASKc)
220
221/*
222 * Flags to indicate properties of various states.
223 */
224#define	LSEL_RUNLEVEL	0x0001	/* runlevels you can transition to */
225
226typedef struct lvl {
227	int	lvl_state;
228	int	lvl_mask;
229	char	lvl_name;
230	int	lvl_flags;
231} lvl_t;
232
233static lvl_t lvls[] = {
234	{ LVLQ,		0,	'Q', 0					},
235	{ LVLQ,		0,	'q', 0					},
236	{ LVL0,		MASK0,	'0', LSEL_RUNLEVEL			},
237	{ LVL1, 	MASK1,	'1', LSEL_RUNLEVEL			},
238	{ LVL2, 	MASK2,	'2', LSEL_RUNLEVEL			},
239	{ LVL3, 	MASK3,	'3', LSEL_RUNLEVEL			},
240	{ LVL4, 	MASK4,	'4', LSEL_RUNLEVEL			},
241	{ LVL5, 	MASK5,	'5', LSEL_RUNLEVEL			},
242	{ LVL6, 	MASK6, 	'6', LSEL_RUNLEVEL			},
243	{ SINGLE_USER, 	MASKSU, 'S', LSEL_RUNLEVEL			},
244	{ SINGLE_USER, 	MASKSU, 's', LSEL_RUNLEVEL			},
245	{ LVLa,		MASKa,	'a', 0					},
246	{ LVLb,		MASKb,	'b', 0					},
247	{ LVLc,		MASKc,	'c', 0					}
248};
249
250#define	LVL_NELEMS (sizeof (lvls) / sizeof (lvl_t))
251
252/*
253 * Legal action field values.
254 */
255#define	OFF		0	/* Kill process if on, else ignore */
256#define	RESPAWN		1	/* Continuously restart process when it dies */
257#define	ONDEMAND	RESPAWN	/* Respawn for a, b, c type processes */
258#define	ONCE		2	/* Start process, do not respawn when dead */
259#define	WAIT		3	/* Perform once and wait to complete */
260#define	BOOT		4	/* Start at boot time only */
261#define	BOOTWAIT	5	/* Start at boot time and wait to complete */
262#define	POWERFAIL	6	/* Start on powerfail */
263#define	POWERWAIT	7	/* Start and wait for complete on powerfail */
264#define	INITDEFAULT	8	/* Default level "init" should start at */
265#define	SYSINIT		9	/* Actions performed before init speaks */
266
267#define	M_OFF		0001
268#define	M_RESPAWN	0002
269#define	M_ONDEMAND	M_RESPAWN
270#define	M_ONCE		0004
271#define	M_WAIT		0010
272#define	M_BOOT		0020
273#define	M_BOOTWAIT	0040
274#define	M_PF		0100
275#define	M_PWAIT		0200
276#define	M_INITDEFAULT	0400
277#define	M_SYSINIT	01000
278
279/* States for the inittab parser in getcmd(). */
280#define	ID	1
281#define	LEVELS	2
282#define	ACTION	3
283#define	COMMAND	4
284#define	COMMENT	5
285
286/*
287 * inittab entry id constants
288 */
289#define	INITTAB_ENTRY_ID_SIZE 4
290#define	INITTAB_ENTRY_ID_STR_FORMAT "%.4s"	/* if INITTAB_ENTRY_ID_SIZE */
291						/* changes, this should */
292						/* change accordingly */
293
294/*
295 * Init can be in any of three main states, "normal" mode where it is
296 * processing entries for the lines file in a normal fashion, "boot" mode,
297 * where it is only interested in the boot actions, and "powerfail" mode,
298 * where it is only interested in powerfail related actions. The following
299 * masks declare the legal actions for each mode.
300 */
301#define	NORMAL_MODES	(M_OFF | M_RESPAWN | M_ONCE | M_WAIT)
302#define	BOOT_MODES	(M_BOOT | M_BOOTWAIT)
303#define	PF_MODES	(M_PF | M_PWAIT)
304
305struct PROC_TABLE {
306	char	p_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
307						/* process */
308	pid_t	p_pid;		/* Process id */
309	short	p_count;	/* How many respawns of this command in */
310				/*   the current series */
311	long	p_time;		/* Start time for a series of respawns */
312	short	p_flags;
313	short	p_exit;		/* Exit status of a process which died */
314};
315
316/*
317 * Flags for the "p_flags" word of a PROC_TABLE entry:
318 *
319 *	OCCUPIED	This slot in init's proc table is in use.
320 *
321 *	LIVING		Process is alive.
322 *
323 *	NOCLEANUP	efork() is not allowed to cleanup this entry even
324 *			if process is dead.
325 *
326 *	NAMED		This process has a name, i.e. came from inittab.
327 *
328 *	DEMANDREQUEST	Process started by a "telinit [abc]" command.  Processes
329 *			formed this way are respawnable and immune to level
330 *			changes as long as their entry exists in inittab.
331 *
332 *	TOUCHED		Flag used by remv() to determine whether it has looked
333 *			at an entry while checking for processes to be killed.
334 *
335 *	WARNED		Flag used by remv() to mark processes that have been
336 *			sent the SIGTERM signal.  If they don't die in 5
337 *			seconds, they are sent the SIGKILL signal.
338 *
339 *	KILLED		Flag used by remv() to mark procs that have been sent
340 *			the SIGTERM and SIGKILL signals.
341 *
342 *	PF_MASK		Bitwise or of legal flags, for sanity checking.
343 */
344#define	OCCUPIED	01
345#define	LIVING		02
346#define	NOCLEANUP	04
347#define	NAMED		010
348#define	DEMANDREQUEST	020
349#define	TOUCHED		040
350#define	WARNED		0100
351#define	KILLED		0200
352#define	PF_MASK		0377
353
354/*
355 * Respawn limits for processes that are to be respawned:
356 *
357 *	SPAWN_INTERVAL	The number of seconds over which "init" will try to
358 *			respawn a process SPAWN_LIMIT times before it gets mad.
359 *
360 *	SPAWN_LIMIT	The number of respawns "init" will attempt in
361 *			SPAWN_INTERVAL seconds before it generates an
362 *			error message and inhibits further tries for
363 *			INHIBIT seconds.
364 *
365 *	INHIBIT		The number of seconds "init" ignores an entry it had
366 *			trouble spawning unless a "telinit Q" is received.
367 */
368
369#define	SPAWN_INTERVAL	(2*60)
370#define	SPAWN_LIMIT	10
371#define	INHIBIT		(5*60)
372
373/*
374 * The maximum number of decimal digits for an id_t.  (ceil(log10 (max_id)))
375 */
376#define	ID_MAX_STR_LEN	10
377
378#define	NULLPROC	((struct PROC_TABLE *)(0))
379#define	NO_ROOM		((struct PROC_TABLE *)(FAILURE))
380
381struct CMD_LINE {
382	char c_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
383						/* process to be affected by */
384						/* action */
385	short c_levels;	/* Mask of legal levels for process */
386	short c_action;	/* Mask for type of action required */
387	char *c_command; /* Pointer to init command */
388};
389
390struct	pidrec {
391	int	pd_type;	/* Command type */
392	pid_t	pd_pid;		/* pid to add or remove */
393};
394
395/*
396 * pd_type's
397 */
398#define	ADDPID	1
399#define	REMPID	2
400
401static struct	pidlist {
402	pid_t	pl_pid;		/* pid to watch for */
403	int	pl_dflag;	/* Flag indicating SIGCLD from this pid */
404	short	pl_exit;	/* Exit status of proc */
405	struct	pidlist	*pl_next; /* Next in list */
406} *Plhead, *Plfree;
407
408/*
409 * The following structure contains a set of modes for /dev/syscon
410 * and should match the default contents of /etc/ioctl.syscon.  It should also
411 * be kept in-sync with base_termios in uts/common/io/ttcompat.c.
412 */
413static struct termios	dflt_termios = {
414	BRKINT|ICRNL|IXON|IMAXBEL,			/* iflag */
415	OPOST|ONLCR|TAB3,				/* oflag */
416	CS8|CREAD|B9600,				/* cflag */
417	ISIG|ICANON|ECHO|ECHOE|ECHOK|ECHOCTL|ECHOKE|IEXTEN, /* lflag */
418	CINTR, CQUIT, CERASE, CKILL, CEOF, 0, 0, 0,
419	0, 0, 0, 0, 0, 0, 0, 0,
420	0, 0, 0
421};
422
423static struct termios	stored_syscon_termios;
424static int		write_ioctl = 0;	/* Rewrite /etc/ioctl.syscon */
425
426static union WAKEUP {
427	struct WAKEFLAGS {
428		unsigned w_usersignal : 1;	/* User sent signal to "init" */
429		unsigned w_childdeath : 1;	/* An "init" child died */
430		unsigned w_powerhit : 1;	/* OS experienced powerfail */
431	}	w_flags;
432	int w_mask;
433} wakeup;
434
435
436struct init_state {
437	int			ist_runlevel;
438	int			ist_num_proc;
439	int			ist_utmpx_ok;
440	struct PROC_TABLE	ist_proc_table[1];
441};
442
443#define	cur_state	(g_state->ist_runlevel)
444#define	num_proc	(g_state->ist_num_proc)
445#define	proc_table	(g_state->ist_proc_table)
446#define	utmpx_ok	(g_state->ist_utmpx_ok)
447
448/* Contract cookies. */
449#define	ORDINARY_COOKIE		0
450#define	STARTD_COOKIE		1
451
452
453#ifndef NDEBUG
454#define	bad_error(func, err)	{					\
455	(void) fprintf(stderr, "%s:%d: %s() failed with unexpected "	\
456	    "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err)); \
457	abort();							\
458}
459#else
460#define	bad_error(func, err)	abort()
461#endif
462
463
464/*
465 * Useful file and device names.
466 */
467static char *CONSOLE	  = "/dev/console";	/* Real system console */
468static char *INITPIPE_DIR = "/var/run";
469static char *INITPIPE	  = "/var/run/initpipe";
470
471#define	INIT_STATE_DIR "/etc/svc/volatile"
472static const char * const init_state_file = INIT_STATE_DIR "/init.state";
473static const char * const init_next_state_file =
474	INIT_STATE_DIR "/init-next.state";
475
476static const int init_num_proc = 20;	/* Initial size of process table. */
477
478static char *UTMPX	 = UTMPX_FILE;		/* Snapshot record file */
479static char *WTMPX	 = WTMPX_FILE;		/* Long term record file */
480static char *INITTAB	 = "/etc/inittab";	/* Script file for "init" */
481static char *SYSTTY	 = "/dev/systty";	/* System Console */
482static char *SYSCON	 = "/dev/syscon";	/* Virtual System console */
483static char *IOCTLSYSCON = "/etc/ioctl.syscon";	/* Last syscon modes */
484static char *ENVFILE	 = "/etc/default/init";	/* Default env. */
485static char *SU	= "/etc/sulogin";	/* Super-user program for single user */
486static char *SH	= "/sbin/sh";		/* Standard shell */
487
488/*
489 * Default Path.  /sbin is included in path only during sysinit phase
490 */
491#define	DEF_PATH	"PATH=/usr/sbin:/usr/bin"
492#define	INIT_PATH	"PATH=/sbin:/usr/sbin:/usr/bin"
493
494static int	prior_state;
495static int	prev_state;	/* State "init" was in last time it woke */
496static int	new_state;	/* State user wants "init" to go to. */
497static int	lvlq_received;	/* Explicit request to examine state */
498static int	op_modes = BOOT_MODES; /* Current state of "init" */
499static int	Gchild = 0;	/* Flag to indicate "godchild" died, set in */
500				/*   childeath() and cleared in cleanaux() */
501static int	Pfd = -1;	/* fd to receive pids thru */
502static unsigned int	spawncnt, pausecnt;
503static int	rsflag;		/* Set if a respawn has taken place */
504static volatile int time_up;	/* Flag set to TRUE by the alarm interrupt */
505				/* routine each time an alarm interrupt */
506				/* takes place. */
507static int	sflg = 0;	/* Set if we were booted -s to single user */
508static int	rflg = 0;	/* Set if booted -r, reconfigure devices */
509static int	bflg = 0;	/* Set if booted -b, don't run rc scripts */
510static pid_t	init_pid;	/* PID of "one true" init for current zone */
511
512static struct init_state *g_state = NULL;
513static size_t	g_state_sz;
514static int	booting = 1;	/* Set while we're booting. */
515
516/*
517 * Array for default global environment.
518 */
519#define	MAXENVENT	24	/* Max number of default env variables + 1 */
520				/* init can use three itself, so this leaves */
521				/* 20 for the administrator in ENVFILE. */
522static char	*glob_envp[MAXENVENT];	/* Array of environment strings */
523static int	glob_envn;		/* Number of environment strings */
524
525
526static struct pollfd	poll_fds[1];
527static int		poll_nfds = 0;	/* poll_fds is uninitialized */
528
529/*
530 * Contracts constants
531 */
532#define	SVC_INIT_PREFIX "init:/"
533#define	SVC_AUX_SIZE (INITTAB_ENTRY_ID_SIZE + 1)
534#define	SVC_FMRI_SIZE (sizeof (SVC_INIT_PREFIX) + INITTAB_ENTRY_ID_SIZE)
535
536static int	legacy_tmpl = -1;	/* fd for legacy contract template */
537static int	startd_tmpl = -1;	/* fd for svc.startd's template */
538static char	startd_svc_aux[SVC_AUX_SIZE];
539
540static char	startd_cline[256] = "";	/* svc.startd's command line */
541static int	do_restart_startd = 1;	/* Whether to restart svc.startd. */
542static char	*smf_options = NULL;	/* Options to give to startd. */
543static int	smf_debug = 0;		/* Messages for debugging smf(5) */
544static time_t	init_boot_time;		/* Substitute for kernel boot time. */
545
546#define	NSTARTD_FAILURE_TIMES	3		/* trigger after 3 failures */
547#define	STARTD_FAILURE_RATE_NS	5000000000LL	/* 1 failure/5 seconds */
548
549static hrtime_t	startd_failure_time[NSTARTD_FAILURE_TIMES];
550static uint_t	startd_failure_index;
551
552
553static char	*prog_name(char *);
554static int	state_to_mask(int);
555static int	lvlname_to_mask(char, int *);
556static void	lscf_set_runlevel(char);
557static int	state_to_flags(int);
558static char	state_to_name(int);
559static int	lvlname_to_state(char);
560static int	getcmd(struct CMD_LINE *, char *);
561static int	realcon();
562static int	spawn_processes();
563static int	get_ioctl_syscon();
564static int	account(short, struct PROC_TABLE *, char *);
565static void	alarmclk();
566static void	childeath(int);
567static void	cleanaux();
568static void	clearent(pid_t, short);
569static void	console(boolean_t, char *, ...);
570static void	init_signals(void);
571static void	setup_pipe();
572static void	killproc(pid_t);
573static void	init_env();
574static void	boot_init();
575static void	powerfail();
576static void	remv();
577static void	write_ioctl_syscon();
578static void	spawn(struct PROC_TABLE *, struct CMD_LINE *);
579static void	setimer(int);
580static void	siglvl(int, siginfo_t *, ucontext_t *);
581static void	sigpoll(int);
582static void	enter_maintenance(void);
583static void	timer(int);
584static void	userinit(int, char **);
585static void	notify_pam_dead(struct utmpx *);
586static long	waitproc(struct PROC_TABLE *);
587static struct PROC_TABLE *efork(int, struct PROC_TABLE *, int);
588static struct PROC_TABLE *findpslot(struct CMD_LINE *);
589static void	increase_proc_table_size();
590static void	st_init();
591static void	st_write();
592static void	contracts_init();
593static void	contract_event(struct pollfd *);
594static int	startd_run(const char *, int, ctid_t);
595static void	startd_record_failure();
596static int	startd_failure_rate_critical();
597static char	*audit_boot_msg();
598static int	audit_put_record(int, int, char *);
599static void	update_boot_archive(int new_state);
600
601int
602main(int argc, char *argv[])
603{
604	int	chg_lvl_flag = FALSE, print_banner = FALSE;
605	int	may_need_audit = 1;
606	int	c;
607	char	*msg;
608
609	/* Get a timestamp for use as boot time, if needed. */
610	(void) time(&init_boot_time);
611
612	/* Get the default umask */
613	cmask = umask(022);
614	(void) umask(cmask);
615
616	/* Parse the arguments to init. Check for single user */
617	opterr = 0;
618	while ((c = getopt(argc, argv, "brsm:")) != EOF) {
619		switch (c) {
620		case 'b':
621			rflg = 0;
622			bflg = 1;
623			if (!sflg)
624				sflg++;
625			break;
626		case 'r':
627			bflg = 0;
628			rflg++;
629			break;
630		case 's':
631			if (!bflg)
632				sflg++;
633			break;
634		case 'm':
635			smf_options = optarg;
636			smf_debug = (strstr(smf_options, "debug") != NULL);
637			break;
638		}
639	}
640
641	/*
642	 * Determine if we are the main init, or a user invoked init, whose job
643	 * it is to inform init to change levels or perform some other action.
644	 */
645	if (zone_getattr(getzoneid(), ZONE_ATTR_INITPID, &init_pid,
646	    sizeof (init_pid)) != sizeof (init_pid)) {
647		(void) fprintf(stderr, "could not get pid for init\n");
648		return (1);
649	}
650
651	/*
652	 * If this PID is not the same as the "true" init for the zone, then we
653	 * must be in 'user' mode.
654	 */
655	if (getpid() != init_pid) {
656		userinit(argc, argv);
657	}
658
659	if (getzoneid() != GLOBAL_ZONEID) {
660		print_banner = TRUE;
661	}
662
663	/*
664	 * Initialize state (and set "booting").
665	 */
666	st_init();
667
668	if (booting && print_banner) {
669		struct utsname un;
670		char buf[BUFSIZ], *isa;
671		long ret;
672		int bits = 32;
673
674		/*
675		 * We want to print the boot banner as soon as
676		 * possible.  In the global zone, the kernel does it,
677		 * but we do not have that luxury in non-global zones,
678		 * so we will print it here.
679		 */
680		(void) uname(&un);
681		ret = sysinfo(SI_ISALIST, buf, sizeof (buf));
682		if (ret != -1L && ret <= sizeof (buf)) {
683			for (isa = strtok(buf, " "); isa;
684			    isa = strtok(NULL, " ")) {
685				if (strcmp(isa, "sparcv9") == 0 ||
686				    strcmp(isa, "amd64") == 0) {
687					bits = 64;
688					break;
689				}
690			}
691		}
692
693		console(B_FALSE,
694		    "\n\n%s Release %s Version %s %d-bit\r\n",
695		    un.sysname, un.release, un.version, bits);
696		console(B_FALSE,
697		    "Copyright (c) 1983, 2010, Oracle and/or its affiliates."
698		    " All rights reserved.\r\n");
699	}
700
701	/*
702	 * Get the ioctl settings for /dev/syscon from /etc/ioctl.syscon
703	 * so that it can be brought up in the state it was in when the
704	 * system went down; or set to defaults if ioctl.syscon isn't
705	 * valid.
706	 *
707	 * This needs to be done even if we're restarting so reset_modes()
708	 * will work in case we need to go down to single user mode.
709	 */
710	write_ioctl = get_ioctl_syscon();
711
712	/*
713	 * Set up all signals to be caught or ignored as appropriate.
714	 */
715	init_signals();
716
717	/* Load glob_envp from ENVFILE. */
718	init_env();
719
720	contracts_init();
721
722	if (!booting) {
723		/* cur_state should have been read in. */
724
725		op_modes = NORMAL_MODES;
726
727		/* Rewrite the ioctl file if it was bad. */
728		if (write_ioctl)
729			write_ioctl_syscon();
730	} else {
731		/*
732		 * It's fine to boot up with state as zero, because
733		 * startd will later tell us the real state.
734		 */
735		cur_state = 0;
736		op_modes = BOOT_MODES;
737
738		boot_init();
739	}
740
741	prev_state = prior_state = cur_state;
742
743	setup_pipe();
744
745	/*
746	 * Here is the beginning of the main process loop.
747	 */
748	for (;;) {
749		if (lvlq_received) {
750			setup_pipe();
751			lvlq_received = B_FALSE;
752		}
753
754		/*
755		 * Clean up any accounting records for dead "godchildren".
756		 */
757		if (Gchild)
758			cleanaux();
759
760		/*
761		 * If in "normal" mode, check all living processes and initiate
762		 * kill sequence on those that should not be there anymore.
763		 */
764		if (op_modes == NORMAL_MODES && cur_state != LVLa &&
765		    cur_state != LVLb && cur_state != LVLc)
766			remv();
767
768		/*
769		 * If a change in run levels is the reason we awoke, now do
770		 * the accounting to report the change in the utmp file.
771		 * Also report the change on the system console.
772		 */
773		if (chg_lvl_flag) {
774			chg_lvl_flag = FALSE;
775
776			if (state_to_flags(cur_state) & LSEL_RUNLEVEL) {
777				char rl = state_to_name(cur_state);
778
779				if (rl != -1)
780					lscf_set_runlevel(rl);
781			}
782
783			may_need_audit = 1;
784		}
785
786		/*
787		 * Scan the inittab file and spawn and respawn processes that
788		 * should be alive in the current state. If inittab does not
789		 * exist default to  single user mode.
790		 */
791		if (spawn_processes() == FAILURE) {
792			prior_state = prev_state;
793			cur_state = SINGLE_USER;
794		}
795
796		/* If any respawns occurred, take note. */
797		if (rsflag) {
798			rsflag = 0;
799			spawncnt++;
800		}
801
802		/*
803		 * If a powerfail signal was received during the last
804		 * sequence, set mode to powerfail.  When spawn_processes() is
805		 * entered the first thing it does is to check "powerhit".  If
806		 * it is in PF_MODES then it clears "powerhit" and does
807		 * a powerfail sequence.  If it is not in PF_MODES, then it
808		 * puts itself in PF_MODES and then clears "powerhit".  Should
809		 * "powerhit" get set again while spawn_processes() is working
810		 * on a powerfail sequence, the following code  will see that
811		 * spawn_processes() tries to execute the powerfail sequence
812		 * again.  This guarantees that the powerfail sequence will be
813		 * successfully completed before further processing takes
814		 * place.
815		 */
816		if (wakeup.w_flags.w_powerhit) {
817			op_modes = PF_MODES;
818			/*
819			 * Make sure that cur_state != prev_state so that
820			 * ONCE and WAIT types work.
821			 */
822			prev_state = 0;
823		} else if (op_modes != NORMAL_MODES) {
824			/*
825			 * If spawn_processes() was not just called while in
826			 * normal mode, we set the mode to normal and it will
827			 * be called again to check normal modes.  If we have
828			 * just finished a powerfail sequence with prev_state
829			 * equal to zero, we set prev_state equal to cur_state
830			 * before the next pass through.
831			 */
832			if (op_modes == PF_MODES)
833				prev_state = cur_state;
834			op_modes = NORMAL_MODES;
835		} else if (cur_state == LVLa || cur_state == LVLb ||
836		    cur_state == LVLc) {
837			/*
838			 * If it was a change of levels that awakened us and the
839			 * new level is one of the demand levels then reset
840			 * cur_state to the previous state and do another scan
841			 * to take care of the usual respawn actions.
842			 */
843			cur_state = prior_state;
844			prior_state = prev_state;
845			prev_state = cur_state;
846		} else {
847			prev_state = cur_state;
848
849			if (wakeup.w_mask == 0) {
850				int ret;
851
852				if (may_need_audit && (cur_state == LVL3)) {
853					msg = audit_boot_msg();
854
855					may_need_audit = 0;
856					(void) audit_put_record(ADT_SUCCESS,
857					    ADT_SUCCESS, msg);
858					free(msg);
859				}
860
861				/*
862				 * "init" is finished with all actions for
863				 * the current wakeup.
864				 */
865				ret = poll(poll_fds, poll_nfds,
866				    SLEEPTIME * MILLISEC);
867				pausecnt++;
868				if (ret > 0)
869					contract_event(&poll_fds[0]);
870				else if (ret < 0 && errno != EINTR)
871					console(B_TRUE, "poll() error: %s\n",
872					    strerror(errno));
873			}
874
875			if (wakeup.w_flags.w_usersignal) {
876				/*
877				 * Install the new level.  This could be a real
878				 * change in levels  or a telinit [Q|a|b|c] or
879				 * just a telinit to the same level at which
880				 * we are running.
881				 */
882				if (new_state != cur_state) {
883					if (new_state == LVLa ||
884					    new_state == LVLb ||
885					    new_state == LVLc) {
886						prev_state = prior_state;
887						prior_state = cur_state;
888						cur_state = new_state;
889					} else {
890						prev_state = cur_state;
891						if (cur_state >= 0)
892							prior_state = cur_state;
893						cur_state = new_state;
894						chg_lvl_flag = TRUE;
895					}
896				}
897
898				new_state = 0;
899			}
900
901			if (wakeup.w_flags.w_powerhit)
902				op_modes = PF_MODES;
903
904			/*
905			 * Clear all wakeup reasons.
906			 */
907			wakeup.w_mask = 0;
908		}
909	}
910
911	/*NOTREACHED*/
912}
913
914static void
915update_boot_archive(int new_state)
916{
917	if (new_state != LVL0 && new_state != LVL5 && new_state != LVL6)
918		return;
919
920	if (getzoneid() != GLOBAL_ZONEID)
921		return;
922
923	(void) system("/sbin/bootadm -ea update_all");
924}
925
926/*
927 * void enter_maintenance()
928 *   A simple invocation of sulogin(1M), with no baggage, in the case that we
929 *   are unable to activate svc.startd(1M).  We fork; the child runs sulogin;
930 *   we wait for it to exit.
931 */
932static void
933enter_maintenance()
934{
935	struct PROC_TABLE	*su_process;
936
937	console(B_FALSE, "Requesting maintenance mode\n"
938	    "(See /lib/svc/share/README for additional information.)\n");
939	(void) sighold(SIGCLD);
940	while ((su_process = efork(M_OFF, NULLPROC, NOCLEANUP)) == NO_ROOM)
941		(void) pause();
942	(void) sigrelse(SIGCLD);
943	if (su_process == NULLPROC) {
944		int fd;
945
946		(void) fclose(stdin);
947		(void) fclose(stdout);
948		(void) fclose(stderr);
949		closefrom(0);
950
951		fd = open(SYSCON, O_RDWR | O_NOCTTY);
952		if (fd >= 0) {
953			(void) dup2(fd, 1);
954			(void) dup2(fd, 2);
955		} else {
956			/*
957			 * Need to issue an error message somewhere.
958			 */
959			syslog(LOG_CRIT, "init[%d]: cannot open %s; %s\n",
960			    getpid(), SYSCON, strerror(errno));
961		}
962
963		/*
964		 * Execute the "su" program.
965		 */
966		(void) execle(SU, SU, "-", (char *)0, glob_envp);
967		console(B_TRUE, "execle of %s failed: %s\n", SU,
968		    strerror(errno));
969		timer(5);
970		exit(1);
971	}
972
973	/*
974	 * If we are the parent, wait around for the child to die
975	 * or for "init" to be signaled to change levels.
976	 */
977	while (waitproc(su_process) == FAILURE) {
978		/*
979		 * All other reasons for waking are ignored when in
980		 * single-user mode.  The only child we are interested
981		 * in is being waited for explicitly by waitproc().
982		 */
983		wakeup.w_mask = 0;
984	}
985}
986
987/*
988 * remv() scans through "proc_table" and performs cleanup.  If
989 * there is a process in the table, which shouldn't be here at
990 * the current run level, then remv() kills the process.
991 */
992static void
993remv()
994{
995	struct PROC_TABLE	*process;
996	struct CMD_LINE		cmd;
997	char			cmd_string[MAXCMDL];
998	int			change_level;
999
1000	change_level = (cur_state != prev_state ? TRUE : FALSE);
1001
1002	/*
1003	 * Clear the TOUCHED flag on all entries so that when we have
1004	 * finished scanning inittab, we will be able to tell if we
1005	 * have any processes for which there is no entry in inittab.
1006	 */
1007	for (process = proc_table;
1008	    (process < proc_table + num_proc); process++) {
1009		process->p_flags &= ~TOUCHED;
1010	}
1011
1012	/*
1013	 * Scan all inittab entries.
1014	 */
1015	while (getcmd(&cmd, &cmd_string[0]) == TRUE) {
1016		/* Scan for process which goes with this entry in inittab. */
1017		for (process = proc_table;
1018		    (process < proc_table + num_proc); process++) {
1019			if ((process->p_flags & OCCUPIED) == 0 ||
1020			    !id_eq(process->p_id, cmd.c_id))
1021				continue;
1022
1023			/*
1024			 * This slot contains the process we are looking for.
1025			 */
1026
1027			/*
1028			 * Is the cur_state SINGLE_USER or is this process
1029			 * marked as "off" or was this proc started by some
1030			 * mechanism other than LVL{a|b|c} and the current level
1031			 * does not support this process?
1032			 */
1033			if (cur_state == SINGLE_USER ||
1034			    cmd.c_action == M_OFF ||
1035			    ((cmd.c_levels & state_to_mask(cur_state)) == 0 &&
1036			    (process->p_flags & DEMANDREQUEST) == 0)) {
1037				if (process->p_flags & LIVING) {
1038					/*
1039					 * Touch this entry so we know we have
1040					 * treated it.  Note that procs which
1041					 * are already dead at this point and
1042					 * should not be restarted are left
1043					 * untouched.  This causes their slot to
1044					 * be freed later after dead accounting
1045					 * is done.
1046					 */
1047					process->p_flags |= TOUCHED;
1048
1049					if ((process->p_flags & KILLED) == 0) {
1050						if (change_level) {
1051							process->p_flags
1052							    |= WARNED;
1053							(void) kill(
1054							    process->p_pid,
1055							    SIGTERM);
1056						} else {
1057							/*
1058							 * Fork a killing proc
1059							 * so "init" can
1060							 * continue without
1061							 * having to pause for
1062							 * TWARN seconds.
1063							 */
1064							killproc(
1065							    process->p_pid);
1066						}
1067						process->p_flags |= KILLED;
1068					}
1069				}
1070			} else {
1071				/*
1072				 * Process can exist at current level.  If it is
1073				 * still alive or a DEMANDREQUEST we touch it so
1074				 * it will be left alone.  Otherwise we leave it
1075				 * untouched so it will be accounted for and
1076				 * cleaned up later in remv().  Dead
1077				 * DEMANDREQUESTs will be accounted but not
1078				 * freed.
1079				 */
1080				if (process->p_flags &
1081				    (LIVING|NOCLEANUP|DEMANDREQUEST))
1082					process->p_flags |= TOUCHED;
1083			}
1084
1085			break;
1086		}
1087	}
1088
1089	st_write();
1090
1091	/*
1092	 * If this was a change of levels call, scan through the
1093	 * process table for processes that were warned to die.  If any
1094	 * are found that haven't left yet, sleep for TWARN seconds and
1095	 * then send final terminations to any that haven't died yet.
1096	 */
1097	if (change_level) {
1098
1099		/*
1100		 * Set the alarm for TWARN seconds on the assumption
1101		 * that there will be some that need to be waited for.
1102		 * This won't harm anything except we are guaranteed to
1103		 * wakeup in TWARN seconds whether we need to or not.
1104		 */
1105		setimer(TWARN);
1106
1107		/*
1108		 * Scan for processes which should be dying.  We hope they
1109		 * will die without having to be sent a SIGKILL signal.
1110		 */
1111		for (process = proc_table;
1112		    (process < proc_table + num_proc); process++) {
1113			/*
1114			 * If this process should die, hasn't yet, and the
1115			 * TWARN time hasn't expired yet, wait for process
1116			 * to die or for timer to expire.
1117			 */
1118			while (time_up == FALSE &&
1119			    (process->p_flags & (WARNED|LIVING|OCCUPIED)) ==
1120			    (WARNED|LIVING|OCCUPIED))
1121				(void) pause();
1122
1123			if (time_up == TRUE)
1124				break;
1125		}
1126
1127		/*
1128		 * If we reached the end of the table without the timer
1129		 * expiring, then there are no procs which will have to be
1130		 * sent the SIGKILL signal.  If the timer has expired, then
1131		 * it is necessary to scan the table again and send signals
1132		 * to all processes which aren't going away nicely.
1133		 */
1134		if (time_up == TRUE) {
1135			for (process = proc_table;
1136			    (process < proc_table + num_proc); process++) {
1137				if ((process->p_flags &
1138				    (WARNED|LIVING|OCCUPIED)) ==
1139				    (WARNED|LIVING|OCCUPIED))
1140					(void) kill(process->p_pid, SIGKILL);
1141			}
1142		}
1143		setimer(0);
1144	}
1145
1146	/*
1147	 * Rescan the proc_table for two kinds of entry, those marked LIVING,
1148	 * NAMED, which don't have an entry in inittab (haven't been TOUCHED
1149	 * by the above scanning), and haven't been sent kill signals, and
1150	 * those entries marked not LIVING, NAMED.  The former procs are killed.
1151	 * The latter have DEAD_PROCESS accounting done and the slot cleared.
1152	 */
1153	for (process = proc_table;
1154	    (process < proc_table + num_proc); process++) {
1155		if ((process->p_flags & (LIVING|NAMED|TOUCHED|KILLED|OCCUPIED))
1156		    == (LIVING|NAMED|OCCUPIED)) {
1157			killproc(process->p_pid);
1158			process->p_flags |= KILLED;
1159		} else if ((process->p_flags & (LIVING|NAMED|OCCUPIED)) ==
1160		    (NAMED|OCCUPIED)) {
1161			(void) account(DEAD_PROCESS, process, NULL);
1162			/*
1163			 * If this named proc hasn't been TOUCHED, then free the
1164			 * space. It has either died of it's own accord, but
1165			 * isn't respawnable or it was killed because it
1166			 * shouldn't exist at this level.
1167			 */
1168			if ((process->p_flags & TOUCHED) == 0)
1169				process->p_flags = 0;
1170		}
1171	}
1172
1173	st_write();
1174}
1175
1176/*
1177 * Extract the svc.startd command line and whether to restart it from its
1178 * inittab entry.
1179 */
1180/*ARGSUSED*/
1181static void
1182process_startd_line(struct CMD_LINE *cmd, char *cmd_string)
1183{
1184	size_t sz;
1185
1186	/* Save the command line. */
1187	if (sflg || rflg) {
1188		/* Also append -r or -s. */
1189		(void) strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1190		(void) strlcat(startd_cline, " -", sizeof (startd_cline));
1191		if (sflg)
1192			sz = strlcat(startd_cline, "s", sizeof (startd_cline));
1193		if (rflg)
1194			sz = strlcat(startd_cline, "r", sizeof (startd_cline));
1195	} else {
1196		sz = strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1197	}
1198
1199	if (sz >= sizeof (startd_cline)) {
1200		console(B_TRUE,
1201		    "svc.startd command line too long.  Ignoring.\n");
1202		startd_cline[0] = '\0';
1203		return;
1204	}
1205}
1206
1207/*
1208 * spawn_processes() scans inittab for entries which should be run at this
1209 * mode.  Processes which should be running but are not, are started.
1210 */
1211static int
1212spawn_processes()
1213{
1214	struct PROC_TABLE		*pp;
1215	struct CMD_LINE			cmd;
1216	char				cmd_string[MAXCMDL];
1217	short				lvl_mask;
1218	int				status;
1219
1220	/*
1221	 * First check the "powerhit" flag.  If it is set, make sure the modes
1222	 * are PF_MODES and clear the "powerhit" flag.  Avoid the possible race
1223	 * on the "powerhit" flag by disallowing a new powerfail interrupt
1224	 * between the test of the powerhit flag and the clearing of it.
1225	 */
1226	if (wakeup.w_flags.w_powerhit) {
1227		wakeup.w_flags.w_powerhit = 0;
1228		op_modes = PF_MODES;
1229	}
1230	lvl_mask = state_to_mask(cur_state);
1231
1232	/*
1233	 * Scan through all the entries in inittab.
1234	 */
1235	while ((status = getcmd(&cmd, &cmd_string[0])) == TRUE) {
1236		if (id_eq(cmd.c_id, "smf")) {
1237			process_startd_line(&cmd, cmd_string);
1238			continue;
1239		}
1240
1241retry_for_proc_slot:
1242
1243		/*
1244		 * Find out if there is a process slot for this entry already.
1245		 */
1246		if ((pp = findpslot(&cmd)) == NULLPROC) {
1247			/*
1248			 * we've run out of proc table entries
1249			 * increase proc_table.
1250			 */
1251			increase_proc_table_size();
1252
1253			/*
1254			 * Retry now as we have an empty proc slot.
1255			 * In case increase_proc_table_size() fails,
1256			 * we will keep retrying.
1257			 */
1258			goto retry_for_proc_slot;
1259		}
1260
1261		/*
1262		 * If there is an entry, and it is marked as DEMANDREQUEST,
1263		 * one of the levels a, b, or c is in its levels mask, and
1264		 * the action field is ONDEMAND and ONDEMAND is a permissable
1265		 * mode, and the process is dead, then respawn it.
1266		 */
1267		if (((pp->p_flags & (LIVING|DEMANDREQUEST)) == DEMANDREQUEST) &&
1268		    (cmd.c_levels & MASK_abc) &&
1269		    (cmd.c_action & op_modes) == M_ONDEMAND) {
1270			spawn(pp, &cmd);
1271			continue;
1272		}
1273
1274		/*
1275		 * If the action is not an action we are interested in,
1276		 * skip the entry.
1277		 */
1278		if ((cmd.c_action & op_modes) == 0 || pp->p_flags & LIVING ||
1279		    (cmd.c_levels & lvl_mask) == 0)
1280			continue;
1281
1282		/*
1283		 * If the modes are the normal modes (ONCE, WAIT, RESPAWN, OFF,
1284		 * ONDEMAND) and the action field is either OFF or the action
1285		 * field is ONCE or WAIT and the current level is the same as
1286		 * the last level, then skip this entry.  ONCE and WAIT only
1287		 * get run when the level changes.
1288		 */
1289		if (op_modes == NORMAL_MODES &&
1290		    (cmd.c_action == M_OFF ||
1291		    (cmd.c_action & (M_ONCE|M_WAIT)) &&
1292		    cur_state == prev_state))
1293			continue;
1294
1295		/*
1296		 * At this point we are interested in performing the action for
1297		 * this entry.  Actions fall into two categories, spinning off
1298		 * a process and not waiting, and spinning off a process and
1299		 * waiting for it to die.  If the action is ONCE, RESPAWN,
1300		 * ONDEMAND, POWERFAIL, or BOOT we don't wait for the process
1301		 * to die, for all other actions we do wait.
1302		 */
1303		if (cmd.c_action & (M_ONCE | M_RESPAWN | M_PF | M_BOOT)) {
1304			spawn(pp, &cmd);
1305
1306		} else {
1307			spawn(pp, &cmd);
1308			while (waitproc(pp) == FAILURE)
1309				;
1310			(void) account(DEAD_PROCESS, pp, NULL);
1311			pp->p_flags = 0;
1312		}
1313	}
1314	return (status);
1315}
1316
1317/*
1318 * spawn() spawns a shell, inserts the information about the process
1319 * process into the proc_table, and does the startup accounting.
1320 */
1321static void
1322spawn(struct PROC_TABLE *process, struct CMD_LINE *cmd)
1323{
1324	int		i;
1325	int		modes, maxfiles;
1326	time_t		now;
1327	struct PROC_TABLE tmproc, *oprocess;
1328
1329	/*
1330	 * The modes to be sent to efork() are 0 unless we are
1331	 * spawning a LVLa, LVLb, or LVLc entry or we will be
1332	 * waiting for the death of the child before continuing.
1333	 */
1334	modes = NAMED;
1335	if (process->p_flags & DEMANDREQUEST || cur_state == LVLa ||
1336	    cur_state == LVLb || cur_state == LVLc)
1337		modes |= DEMANDREQUEST;
1338	if ((cmd->c_action & (M_SYSINIT | M_WAIT | M_BOOTWAIT | M_PWAIT)) != 0)
1339		modes |= NOCLEANUP;
1340
1341	/*
1342	 * If this is a respawnable process, check the threshold
1343	 * information to avoid excessive respawns.
1344	 */
1345	if (cmd->c_action & M_RESPAWN) {
1346		/*
1347		 * Add NOCLEANUP to all respawnable commands so that the
1348		 * information about the frequency of respawns isn't lost.
1349		 */
1350		modes |= NOCLEANUP;
1351		(void) time(&now);
1352
1353		/*
1354		 * If no time is assigned, then this is the first time
1355		 * this command is being processed in this series.  Assign
1356		 * the current time.
1357		 */
1358		if (process->p_time == 0L)
1359			process->p_time = now;
1360
1361		if (process->p_count++ == SPAWN_LIMIT) {
1362
1363			if ((now - process->p_time) < SPAWN_INTERVAL) {
1364				/*
1365				 * Process is respawning too rapidly.  Print
1366				 * message and refuse to respawn it for now.
1367				 */
1368				console(B_TRUE, "Command is respawning too "
1369				    "rapidly. Check for possible errors.\n"
1370				    "id:%4s \"%s\"\n",
1371				    &cmd->c_id[0], &cmd->c_command[EXEC]);
1372				return;
1373			}
1374			process->p_time = now;
1375			process->p_count = 0;
1376
1377		} else if (process->p_count > SPAWN_LIMIT) {
1378			/*
1379			 * If process has been respawning too rapidly and
1380			 * the inhibit time limit hasn't expired yet, we
1381			 * refuse to respawn.
1382			 */
1383			if (now - process->p_time < SPAWN_INTERVAL + INHIBIT)
1384				return;
1385			process->p_time = now;
1386			process->p_count = 0;
1387		}
1388		rsflag = TRUE;
1389	}
1390
1391	/*
1392	 * Spawn a child process to execute this command.
1393	 */
1394	(void) sighold(SIGCLD);
1395	oprocess = process;
1396	while ((process = efork(cmd->c_action, oprocess, modes)) == NO_ROOM)
1397		(void) pause();
1398
1399	if (process == NULLPROC) {
1400
1401		/*
1402		 * We are the child.  We must make sure we get a different
1403		 * file pointer for our references to utmpx.  Otherwise our
1404		 * seeks and reads will compete with those of the parent.
1405		 */
1406		endutxent();
1407
1408		/*
1409		 * Perform the accounting for the beginning of a process.
1410		 * Note that all processes are initially "INIT_PROCESS"es.
1411		 */
1412		tmproc.p_id[0] = cmd->c_id[0];
1413		tmproc.p_id[1] = cmd->c_id[1];
1414		tmproc.p_id[2] = cmd->c_id[2];
1415		tmproc.p_id[3] = cmd->c_id[3];
1416		tmproc.p_pid = getpid();
1417		tmproc.p_exit = 0;
1418		(void) account(INIT_PROCESS, &tmproc,
1419		    prog_name(&cmd->c_command[EXEC]));
1420		maxfiles = ulimit(UL_GDESLIM, 0);
1421		for (i = 0; i < maxfiles; i++)
1422			(void) fcntl(i, F_SETFD, FD_CLOEXEC);
1423
1424		/*
1425		 * Now exec a shell with the -c option and the command
1426		 * from inittab.
1427		 */
1428		(void) execle(SH, "INITSH", "-c", cmd->c_command, (char *)0,
1429		    glob_envp);
1430		console(B_TRUE, "Command\n\"%s\"\n failed to execute.  errno "
1431		    "= %d (exec of shell failed)\n", cmd->c_command, errno);
1432
1433		/*
1434		 * Don't come back so quickly that "init" doesn't have a
1435		 * chance to finish putting this child in "proc_table".
1436		 */
1437		timer(20);
1438		exit(1);
1439
1440	}
1441
1442	/*
1443	 * We are the parent.  Insert the necessary
1444	 * information in the proc_table.
1445	 */
1446	process->p_id[0] = cmd->c_id[0];
1447	process->p_id[1] = cmd->c_id[1];
1448	process->p_id[2] = cmd->c_id[2];
1449	process->p_id[3] = cmd->c_id[3];
1450
1451	st_write();
1452
1453	(void) sigrelse(SIGCLD);
1454}
1455
1456/*
1457 * findpslot() finds the old slot in the process table for the
1458 * command with the same id, or it finds an empty slot.
1459 */
1460static struct PROC_TABLE *
1461findpslot(struct CMD_LINE *cmd)
1462{
1463	struct PROC_TABLE	*process;
1464	struct PROC_TABLE	*empty = NULLPROC;
1465
1466	for (process = proc_table;
1467	    (process < proc_table + num_proc); process++) {
1468		if (process->p_flags & OCCUPIED &&
1469		    id_eq(process->p_id, cmd->c_id))
1470			break;
1471
1472		/*
1473		 * If the entry is totally empty and "empty" is still 0,
1474		 * remember where this hole is and make sure the slot is
1475		 * zeroed out.
1476		 */
1477		if (empty == NULLPROC && (process->p_flags & OCCUPIED) == 0) {
1478			empty = process;
1479			process->p_id[0] = '\0';
1480			process->p_id[1] = '\0';
1481			process->p_id[2] = '\0';
1482			process->p_id[3] = '\0';
1483			process->p_pid = 0;
1484			process->p_time = 0L;
1485			process->p_count = 0;
1486			process->p_flags = 0;
1487			process->p_exit = 0;
1488		}
1489	}
1490
1491	/*
1492	 * If there is no entry for this slot, then there should be an
1493	 * empty slot.  If there is no empty slot, then we've run out
1494	 * of proc_table space.  If the latter is true, empty will be
1495	 * NULL and the caller will have to complain.
1496	 */
1497	if (process == (proc_table + num_proc))
1498		process = empty;
1499
1500	return (process);
1501}
1502
1503/*
1504 * getcmd() parses lines from inittab.  Each time it finds a command line
1505 * it will return TRUE as well as fill the passed CMD_LINE structure and
1506 * the shell command string.  When the end of inittab is reached, FALSE
1507 * is returned inittab is automatically opened if it is not currently open
1508 * and is closed when the end of the file is reached.
1509 */
1510static FILE *fp_inittab = NULL;
1511
1512static int
1513getcmd(struct CMD_LINE *cmd, char *shcmd)
1514{
1515	char	*ptr;
1516	int	c, lastc, state;
1517	char 	*ptr1;
1518	int	answer, i, proceed;
1519	struct	stat	sbuf;
1520	static char *actions[] = {
1521		"off", "respawn", "ondemand", "once", "wait", "boot",
1522		"bootwait", "powerfail", "powerwait", "initdefault",
1523		"sysinit",
1524	};
1525	static short act_masks[] = {
1526		M_OFF, M_RESPAWN, M_ONDEMAND, M_ONCE, M_WAIT, M_BOOT,
1527		M_BOOTWAIT, M_PF, M_PWAIT, M_INITDEFAULT, M_SYSINIT,
1528	};
1529	/*
1530	 * Only these actions will be allowed for entries which
1531	 * are specified for single-user mode.
1532	 */
1533	short su_acts = M_INITDEFAULT | M_PF | M_PWAIT | M_WAIT;
1534
1535	if (fp_inittab == NULL) {
1536		/*
1537		 * Before attempting to open inittab we stat it to make
1538		 * sure it currently exists and is not empty.  We try
1539		 * several times because someone may have temporarily
1540		 * unlinked or truncated the file.
1541		 */
1542		for (i = 0; i < 3; i++) {
1543			if (stat(INITTAB, &sbuf) == -1) {
1544				if (i == 2) {
1545					console(B_TRUE,
1546					    "Cannot stat %s, errno: %d\n",
1547					    INITTAB, errno);
1548					return (FAILURE);
1549				} else {
1550					timer(3);
1551				}
1552			} else if (sbuf.st_size < 10) {
1553				if (i == 2) {
1554					console(B_TRUE,
1555					    "%s truncated or corrupted\n",
1556					    INITTAB);
1557					return (FAILURE);
1558				} else {
1559					timer(3);
1560				}
1561			} else {
1562				break;
1563			}
1564		}
1565
1566		/*
1567		 * If unable to open inittab, print error message and
1568		 * return FAILURE to caller.
1569		 */
1570		if ((fp_inittab = fopen(INITTAB, "r")) == NULL) {
1571			console(B_TRUE, "Cannot open %s errno: %d\n", INITTAB,
1572			    errno);
1573			return (FAILURE);
1574		}
1575	}
1576
1577	/*
1578	 * Keep getting commands from inittab until you find a
1579	 * good one or run out of file.
1580	 */
1581	for (answer = FALSE; answer == FALSE; ) {
1582		/*
1583		 * Zero out the cmd itself before trying next line.
1584		 */
1585		bzero(cmd, sizeof (struct CMD_LINE));
1586
1587		/*
1588		 * Read in lines of inittab, parsing at colons, until a line is
1589		 * read in which doesn't end with a backslash.  Do not start if
1590		 * the first character read is an EOF.  Note that this means
1591		 * that lines which don't end in a newline are still processed,
1592		 * since the "for" will terminate normally once started,
1593		 * regardless of whether line terminates with a newline or EOF.
1594		 */
1595		state = FAILURE;
1596		if ((c = fgetc(fp_inittab)) == EOF) {
1597			answer = FALSE;
1598			(void) fclose(fp_inittab);
1599			fp_inittab = NULL;
1600			break;
1601		}
1602
1603		for (proceed = TRUE, ptr = shcmd, state = ID, lastc = '\0';
1604		    proceed && c != EOF;
1605		    lastc = c, c = fgetc(fp_inittab)) {
1606		    /* If we're not in the FAILURE state and haven't	*/
1607		    /* yet reached the shell command field, process	*/
1608		    /* the line, otherwise just look for a real end	*/
1609		    /* of line.						*/
1610		    if (state != FAILURE && state != COMMAND) {
1611			/*
1612			 * Squeeze out spaces and tabs.
1613			 */
1614			if (c == ' ' || c == '\t')
1615				continue;
1616
1617			/*
1618			 * Ignore characters in a comment, except for the \n.
1619			 */
1620			if (state == COMMENT) {
1621				if (c == '\n') {
1622					lastc = ' ';
1623					break;
1624				} else {
1625					continue;
1626				}
1627			}
1628
1629			/*
1630			 * Detect comments (lines whose first non-whitespace
1631			 * character is '#') by checking that we're at the
1632			 * beginning of a line, have seen a '#', and haven't
1633			 * yet accumulated any characters.
1634			 */
1635			if (state == ID && c == '#' && ptr == shcmd) {
1636				state = COMMENT;
1637				continue;
1638			}
1639
1640			/*
1641			 * If the character is a ':', then check the
1642			 * previous field for correctness and advance
1643			 * to the next field.
1644			 */
1645			if (c == ':') {
1646			    switch (state) {
1647
1648			    case ID :
1649				/*
1650				 * Check to see that there are only
1651				 * 1 to 4 characters for the id.
1652				 */
1653				if ((i = ptr - shcmd) < 1 || i > 4) {
1654					state = FAILURE;
1655				} else {
1656					bcopy(shcmd, &cmd->c_id[0], i);
1657					ptr = shcmd;
1658					state = LEVELS;
1659				}
1660				break;
1661
1662			    case LEVELS :
1663				/*
1664				 * Build a mask for all the levels for
1665				 * which this command will be legal.
1666				 */
1667				for (cmd->c_levels = 0, ptr1 = shcmd;
1668				    ptr1 < ptr; ptr1++) {
1669					int mask;
1670					if (lvlname_to_mask(*ptr1,
1671					    &mask) == -1) {
1672						state = FAILURE;
1673						break;
1674					}
1675					cmd->c_levels |= mask;
1676				}
1677				if (state != FAILURE) {
1678					state = ACTION;
1679					ptr = shcmd;	/* Reset the buffer */
1680				}
1681				break;
1682
1683			    case ACTION :
1684				/*
1685				 * Null terminate the string in shcmd buffer and
1686				 * then try to match against legal actions.  If
1687				 * the field is of length 0, then the default of
1688				 * "RESPAWN" is used if the id is numeric,
1689				 * otherwise the default is "OFF".
1690				 */
1691				if (ptr == shcmd) {
1692					if (isdigit(cmd->c_id[0]) &&
1693					    (cmd->c_id[1] == '\0' ||
1694						isdigit(cmd->c_id[1])) &&
1695					    (cmd->c_id[2] == '\0' ||
1696						isdigit(cmd->c_id[2])) &&
1697					    (cmd->c_id[3] == '\0' ||
1698						isdigit(cmd->c_id[3])))
1699						    cmd->c_action = M_RESPAWN;
1700					else
1701						    cmd->c_action = M_OFF;
1702				} else {
1703				    for (cmd->c_action = 0, i = 0, *ptr = '\0';
1704				    i < sizeof (actions)/sizeof (char *);
1705				    i++) {
1706					if (strcmp(shcmd, actions[i]) == 0) {
1707					    if ((cmd->c_levels & MASKSU) &&
1708						!(act_masks[i] & su_acts))
1709						    cmd->c_action = 0;
1710					    else
1711						cmd->c_action = act_masks[i];
1712					    break;
1713					}
1714				    }
1715				}
1716
1717				/*
1718				 * If the action didn't match any legal action,
1719				 * set state to FAILURE.
1720				 */
1721				if (cmd->c_action == 0) {
1722					state = FAILURE;
1723				} else {
1724					state = COMMAND;
1725					(void) strcpy(shcmd, "exec ");
1726				}
1727				ptr = shcmd + EXEC;
1728				break;
1729			    }
1730			    continue;
1731			}
1732		    }
1733
1734		    /* If the character is a '\n', then this is the end of a */
1735		    /* line.  If the '\n' wasn't preceded by a backslash, */
1736		    /* it is also the end of an inittab command.  If it was */
1737		    /* preceded by a backslash then the next line is a */
1738		    /* continuation.  Note that the continuation '\n' falls */
1739		    /* through and is treated like other characters and is */
1740		    /* stored in the shell command line. */
1741		    if (c == '\n' && lastc != '\\') {
1742				proceed = FALSE;
1743				*ptr = '\0';
1744				break;
1745		    }
1746
1747		    /* For all other characters just stuff them into the */
1748		    /* command as long as there aren't too many of them. */
1749		    /* Make sure there is room for a terminating '\0' also. */
1750		    if (ptr >= shcmd + MAXCMDL - 1)
1751			state = FAILURE;
1752		    else
1753			*ptr++ = (char)c;
1754
1755		    /* If the character we just stored was a quoted	*/
1756		    /* backslash, then change "c" to '\0', so that this	*/
1757		    /* backslash will not cause a subsequent '\n' to appear */
1758		    /* quoted.  In otherwords '\' '\' '\n' is the real end */
1759		    /* of a command, while '\' '\n' is a continuation. */
1760		    if (c == '\\' && lastc == '\\')
1761			c = '\0';
1762		}
1763
1764		/*
1765		 * Make sure all the fields are properly specified
1766		 * for a good command line.
1767		 */
1768		if (state == COMMAND) {
1769			answer = TRUE;
1770			cmd->c_command = shcmd;
1771
1772			/*
1773			 * If no default level was supplied, insert
1774			 * all numerical levels.
1775			 */
1776			if (cmd->c_levels == 0)
1777				cmd->c_levels = MASK_NUMERIC;
1778
1779			/*
1780			 * If no action has been supplied, declare this
1781			 * entry to be OFF.
1782			 */
1783			if (cmd->c_action == 0)
1784				cmd->c_action = M_OFF;
1785
1786			/*
1787			 * If no shell command has been supplied, make sure
1788			 * there is a null string in the command field.
1789			 */
1790			if (ptr == shcmd + EXEC)
1791				*shcmd = '\0';
1792		} else
1793			answer = FALSE;
1794
1795		/*
1796		 * If we have reached the end of inittab, then close it
1797		 * and quit trying to find a good command line.
1798		 */
1799		if (c == EOF) {
1800			(void) fclose(fp_inittab);
1801			fp_inittab = NULL;
1802			break;
1803		}
1804	}
1805	return (answer);
1806}
1807
1808/*
1809 * lvlname_to_state(): convert the character name of a state to its level
1810 * (its corresponding signal number).
1811 */
1812static int
1813lvlname_to_state(char name)
1814{
1815	int i;
1816	for (i = 0; i < LVL_NELEMS; i++) {
1817		if (lvls[i].lvl_name == name)
1818			return (lvls[i].lvl_state);
1819	}
1820	return (-1);
1821}
1822
1823/*
1824 * state_to_name(): convert the level to the character name.
1825 */
1826static char
1827state_to_name(int state)
1828{
1829	int i;
1830	for (i = 0; i < LVL_NELEMS; i++) {
1831		if (lvls[i].lvl_state == state)
1832			return (lvls[i].lvl_name);
1833	}
1834	return (-1);
1835}
1836
1837/*
1838 * state_to_mask(): return the mask corresponding to a signal number
1839 */
1840static int
1841state_to_mask(int state)
1842{
1843	int i;
1844	for (i = 0; i < LVL_NELEMS; i++) {
1845		if (lvls[i].lvl_state == state)
1846			return (lvls[i].lvl_mask);
1847	}
1848	return (0);	/* return 0, since that represents an empty mask */
1849}
1850
1851/*
1852 * lvlname_to_mask(): return the mask corresponding to a levels character name
1853 */
1854static int
1855lvlname_to_mask(char name, int *mask)
1856{
1857	int i;
1858	for (i = 0; i < LVL_NELEMS; i++) {
1859		if (lvls[i].lvl_name == name) {
1860			*mask = lvls[i].lvl_mask;
1861			return (0);
1862		}
1863	}
1864	return (-1);
1865}
1866
1867/*
1868 * state_to_flags(): return the flags corresponding to a runlevel.  These
1869 * indicate properties of that runlevel.
1870 */
1871static int
1872state_to_flags(int state)
1873{
1874	int i;
1875	for (i = 0; i < LVL_NELEMS; i++) {
1876		if (lvls[i].lvl_state == state)
1877			return (lvls[i].lvl_flags);
1878	}
1879	return (0);
1880}
1881
1882/*
1883 * killproc() creates a child which kills the process specified by pid.
1884 */
1885void
1886killproc(pid_t pid)
1887{
1888	struct PROC_TABLE	*process;
1889
1890	(void) sighold(SIGCLD);
1891	while ((process = efork(M_OFF, NULLPROC, 0)) == NO_ROOM)
1892		(void) pause();
1893	(void) sigrelse(SIGCLD);
1894
1895	if (process == NULLPROC) {
1896		/*
1897		 * efork() sets all signal handlers to the default, so reset
1898		 * the ALRM handler to make timer() work as expected.
1899		 */
1900		(void) sigset(SIGALRM, alarmclk);
1901
1902		/*
1903		 * We are the child.  Try to terminate the process nicely
1904		 * first using SIGTERM and if it refuses to die in TWARN
1905		 * seconds kill it with SIGKILL.
1906		 */
1907		(void) kill(pid, SIGTERM);
1908		(void) timer(TWARN);
1909		(void) kill(pid, SIGKILL);
1910		(void) exit(0);
1911	}
1912}
1913
1914/*
1915 * Set up the default environment for all procs to be forked from init.
1916 * Read the values from the /etc/default/init file, except for PATH.  If
1917 * there's not enough room in the environment array, the environment
1918 * lines that don't fit are silently discarded.
1919 */
1920void
1921init_env()
1922{
1923	char	line[MAXCMDL];
1924	FILE	*fp;
1925	int	inquotes, length, wslength;
1926	char	*tokp, *cp1, *cp2;
1927
1928	glob_envp[0] = malloc((unsigned)(strlen(DEF_PATH)+2));
1929	(void) strcpy(glob_envp[0], DEF_PATH);
1930	glob_envn = 1;
1931
1932	if (rflg) {
1933		glob_envp[1] =
1934		    malloc((unsigned)(strlen("_DVFS_RECONFIG=YES")+2));
1935		(void) strcpy(glob_envp[1], "_DVFS_RECONFIG=YES");
1936		++glob_envn;
1937	} else if (bflg == 1) {
1938		glob_envp[1] =
1939		    malloc((unsigned)(strlen("RB_NOBOOTRC=YES")+2));
1940		(void) strcpy(glob_envp[1], "RB_NOBOOTRC=YES");
1941		++glob_envn;
1942	}
1943
1944	if ((fp = fopen(ENVFILE, "r")) == NULL) {
1945		console(B_TRUE,
1946		    "Cannot open %s. Environment not initialized.\n",
1947		    ENVFILE);
1948	} else {
1949		while (fgets(line, MAXCMDL - 1, fp) != NULL &&
1950		    glob_envn < MAXENVENT - 2) {
1951			/*
1952			 * Toss newline
1953			 */
1954			length = strlen(line);
1955			if (line[length - 1] == '\n')
1956				line[length - 1] = '\0';
1957
1958			/*
1959			 * Ignore blank or comment lines.
1960			 */
1961			if (line[0] == '#' || line[0] == '\0' ||
1962			    (wslength = strspn(line, " \t\n")) ==
1963			    strlen(line) ||
1964			    strchr(line, '#') == line + wslength)
1965				continue;
1966
1967			/*
1968			 * First make a pass through the line and change
1969			 * any non-quoted semi-colons to blanks so they
1970			 * will be treated as token separators below.
1971			 */
1972			inquotes = 0;
1973			for (cp1 = line; *cp1 != '\0'; cp1++) {
1974				if (*cp1 == '"') {
1975					if (inquotes == 0)
1976						inquotes = 1;
1977					else
1978						inquotes = 0;
1979				} else if (*cp1 == ';') {
1980					if (inquotes == 0)
1981						*cp1 = ' ';
1982				}
1983			}
1984
1985			/*
1986			 * Tokens within the line are separated by blanks
1987			 *  and tabs.  For each token in the line which
1988			 * contains a '=' we strip out any quotes and then
1989			 * stick the token in the environment array.
1990			 */
1991			if ((tokp = strtok(line, " \t")) == NULL)
1992				continue;
1993			do {
1994				if (strchr(tokp, '=') == NULL)
1995					continue;
1996				length = strlen(tokp);
1997				while ((cp1 = strpbrk(tokp, "\"\'")) != NULL) {
1998					for (cp2 = cp1;
1999					    cp2 < &tokp[length]; cp2++)
2000						*cp2 = *(cp2 + 1);
2001					length--;
2002				}
2003
2004				if (strncmp(tokp, "CMASK=",
2005				    sizeof ("CMASK=") - 1) == 0) {
2006					long t;
2007
2008					/* We know there's an = */
2009					t = strtol(strchr(tokp, '=') + 1, NULL,
2010					    8);
2011
2012					/* Sanity */
2013					if (t <= 077 && t >= 0)
2014						cmask = (int)t;
2015					(void) umask(cmask);
2016					continue;
2017				}
2018				glob_envp[glob_envn] =
2019				    malloc((unsigned)(length + 1));
2020				(void) strcpy(glob_envp[glob_envn], tokp);
2021				if (++glob_envn >= MAXENVENT - 1)
2022					break;
2023			} while ((tokp = strtok(NULL, " \t")) != NULL);
2024		}
2025
2026		/*
2027		 * Append a null pointer to the environment array
2028		 * to mark its end.
2029		 */
2030		glob_envp[glob_envn] = NULL;
2031		(void) fclose(fp);
2032	}
2033}
2034
2035/*
2036 * boot_init(): Do initialization things that should be done at boot.
2037 */
2038void
2039boot_init()
2040{
2041	int i;
2042	struct PROC_TABLE *process, *oprocess;
2043	struct CMD_LINE	cmd;
2044	char	line[MAXCMDL];
2045	char	svc_aux[SVC_AUX_SIZE];
2046	char	init_svc_fmri[SVC_FMRI_SIZE];
2047	char *old_path;
2048	int maxfiles;
2049
2050	/* Use INIT_PATH for sysinit cmds */
2051	old_path = glob_envp[0];
2052	glob_envp[0] = malloc((unsigned)(strlen(INIT_PATH)+2));
2053	(void) strcpy(glob_envp[0], INIT_PATH);
2054
2055	/*
2056	 * Scan inittab(4) and process the special svc.startd entry, initdefault
2057	 * and sysinit entries.
2058	 */
2059	while (getcmd(&cmd, &line[0]) == TRUE) {
2060		if (startd_tmpl >= 0 && id_eq(cmd.c_id, "smf")) {
2061			process_startd_line(&cmd, line);
2062			(void) snprintf(startd_svc_aux, SVC_AUX_SIZE,
2063			    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2064		} else if (cmd.c_action == M_INITDEFAULT) {
2065			/*
2066			 * initdefault is no longer meaningful, as the SMF
2067			 * milestone controls what (legacy) run level we
2068			 * boot to.
2069			 */
2070			console(B_TRUE,
2071			    "Ignoring legacy \"initdefault\" entry.\n");
2072		} else if (cmd.c_action == M_SYSINIT) {
2073			/*
2074			 * Execute the "sysinit" entry and wait for it to
2075			 * complete.  No bookkeeping is performed on these
2076			 * entries because we avoid writing to the file system
2077			 * until after there has been an chance to check it.
2078			 */
2079			if (process = findpslot(&cmd)) {
2080				(void) sighold(SIGCLD);
2081				(void) snprintf(svc_aux, SVC_AUX_SIZE,
2082				    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2083				(void) snprintf(init_svc_fmri, SVC_FMRI_SIZE,
2084				    SVC_INIT_PREFIX INITTAB_ENTRY_ID_STR_FORMAT,
2085				    cmd.c_id);
2086				if (legacy_tmpl >= 0) {
2087					(void) ct_pr_tmpl_set_svc_fmri(
2088					    legacy_tmpl, init_svc_fmri);
2089					(void) ct_pr_tmpl_set_svc_aux(
2090					    legacy_tmpl, svc_aux);
2091				}
2092
2093				for (oprocess = process;
2094				    (process = efork(M_OFF, oprocess,
2095				    (NAMED|NOCLEANUP))) == NO_ROOM;
2096				    /* CSTYLED */)
2097					;
2098				(void) sigrelse(SIGCLD);
2099
2100				if (process == NULLPROC) {
2101					maxfiles = ulimit(UL_GDESLIM, 0);
2102
2103					for (i = 0; i < maxfiles; i++)
2104						(void) fcntl(i, F_SETFD,
2105						    FD_CLOEXEC);
2106					(void) execle(SH, "INITSH", "-c",
2107					    cmd.c_command,
2108					    (char *)0, glob_envp);
2109					console(B_TRUE,
2110"Command\n\"%s\"\n failed to execute.  errno = %d (exec of shell failed)\n",
2111					    cmd.c_command, errno);
2112					exit(1);
2113				} else while (waitproc(process) == FAILURE);
2114				process->p_flags = 0;
2115				st_write();
2116			}
2117		}
2118	}
2119
2120	/* Restore the path. */
2121	free(glob_envp[0]);
2122	glob_envp[0] = old_path;
2123
2124	/*
2125	 * This will enable st_write() to complain about init_state_file.
2126	 */
2127	booting = 0;
2128
2129	/*
2130	 * If the /etc/ioctl.syscon didn't exist or had invalid contents write
2131	 * out a correct version.
2132	 */
2133	if (write_ioctl)
2134		write_ioctl_syscon();
2135
2136	/*
2137	 * Start svc.startd(1M), which does most of the work.
2138	 */
2139	if (startd_cline[0] != '\0' && startd_tmpl >= 0) {
2140		/* Start svc.startd. */
2141		if (startd_run(startd_cline, startd_tmpl, 0) == -1)
2142			cur_state = SINGLE_USER;
2143	} else {
2144		console(B_TRUE, "Absent svc.startd entry or bad "
2145		    "contract template.  Not starting svc.startd.\n");
2146		enter_maintenance();
2147	}
2148}
2149
2150/*
2151 * init_signals(): Initialize all signals to either be caught or ignored.
2152 */
2153void
2154init_signals(void)
2155{
2156	struct sigaction act;
2157	int i;
2158
2159	/*
2160	 * Start by ignoring all signals, then selectively re-enable some.
2161	 * The SIG_IGN disposition will only affect asynchronous signals:
2162	 * any signal that we trigger synchronously that doesn't end up
2163	 * being handled by siglvl() will be forcibly delivered by the kernel.
2164	 */
2165	for (i = SIGHUP; i <= SIGRTMAX; i++)
2166		(void) sigset(i, SIG_IGN);
2167
2168	/*
2169	 * Handle all level-changing signals using siglvl() and set sa_mask so
2170	 * that all level-changing signals are blocked while in siglvl().
2171	 */
2172	act.sa_handler = siglvl;
2173	act.sa_flags = SA_SIGINFO;
2174	(void) sigemptyset(&act.sa_mask);
2175
2176	(void) sigaddset(&act.sa_mask, LVLQ);
2177	(void) sigaddset(&act.sa_mask, LVL0);
2178	(void) sigaddset(&act.sa_mask, LVL1);
2179	(void) sigaddset(&act.sa_mask, LVL2);
2180	(void) sigaddset(&act.sa_mask, LVL3);
2181	(void) sigaddset(&act.sa_mask, LVL4);
2182	(void) sigaddset(&act.sa_mask, LVL5);
2183	(void) sigaddset(&act.sa_mask, LVL6);
2184	(void) sigaddset(&act.sa_mask, SINGLE_USER);
2185	(void) sigaddset(&act.sa_mask, LVLa);
2186	(void) sigaddset(&act.sa_mask, LVLb);
2187	(void) sigaddset(&act.sa_mask, LVLc);
2188
2189	(void) sigaction(LVLQ, &act, NULL);
2190	(void) sigaction(LVL0, &act, NULL);
2191	(void) sigaction(LVL1, &act, NULL);
2192	(void) sigaction(LVL2, &act, NULL);
2193	(void) sigaction(LVL3, &act, NULL);
2194	(void) sigaction(LVL4, &act, NULL);
2195	(void) sigaction(LVL5, &act, NULL);
2196	(void) sigaction(LVL6, &act, NULL);
2197	(void) sigaction(SINGLE_USER, &act, NULL);
2198	(void) sigaction(LVLa, &act, NULL);
2199	(void) sigaction(LVLb, &act, NULL);
2200	(void) sigaction(LVLc, &act, NULL);
2201
2202	(void) sigset(SIGALRM, alarmclk);
2203	alarmclk();
2204
2205	(void) sigset(SIGCLD, childeath);
2206	(void) sigset(SIGPWR, powerfail);
2207}
2208
2209/*
2210 * Set up pipe for "godchildren". If the file exists and is a pipe just open
2211 * it. Else, if the file system is r/w create it.  Otherwise, defer its
2212 * creation and open until after /var/run has been mounted.  This function is
2213 * only called on startup and when explicitly requested via LVLQ.
2214 */
2215void
2216setup_pipe()
2217{
2218	struct stat stat_buf;
2219	struct statvfs statvfs_buf;
2220	struct sigaction act;
2221
2222	/*
2223	 * Always close the previous pipe descriptor as the mounted filesystems
2224	 * may have changed.
2225	 */
2226	if (Pfd >= 0)
2227		(void) close(Pfd);
2228
2229	if ((stat(INITPIPE, &stat_buf) == 0) &&
2230	    ((stat_buf.st_mode & (S_IFMT|S_IRUSR)) == (S_IFIFO|S_IRUSR)))
2231		Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2232	else
2233		if ((statvfs(INITPIPE_DIR, &statvfs_buf) == 0) &&
2234		    ((statvfs_buf.f_flag & ST_RDONLY) == 0)) {
2235			(void) unlink(INITPIPE);
2236			(void) mknod(INITPIPE, S_IFIFO | 0600, 0);
2237			Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2238		}
2239
2240	if (Pfd >= 0) {
2241		(void) ioctl(Pfd, I_SETSIG, S_INPUT);
2242		/*
2243		 * Read pipe in message discard mode.
2244		 */
2245		(void) ioctl(Pfd, I_SRDOPT, RMSGD);
2246
2247		act.sa_handler = sigpoll;
2248		act.sa_flags = 0;
2249		(void) sigemptyset(&act.sa_mask);
2250		(void) sigaddset(&act.sa_mask, SIGCLD);
2251		(void) sigaction(SIGPOLL, &act, NULL);
2252	}
2253}
2254
2255/*
2256 * siglvl - handle an asynchronous signal from init(1M) telling us that we
2257 * should change the current run level.  We set new_state accordingly.
2258 */
2259void
2260siglvl(int sig, siginfo_t *sip, ucontext_t *ucp)
2261{
2262	struct PROC_TABLE *process;
2263	struct sigaction act;
2264
2265	/*
2266	 * If the signal was from the kernel (rather than init(1M)) then init
2267	 * itself tripped the signal.  That is, we might have a bug and tripped
2268	 * a real SIGSEGV instead of receiving it as an alias for SIGLVLa.  In
2269	 * such a case we reset the disposition to SIG_DFL, block all signals
2270	 * in uc_mask but the current one, and return to the interrupted ucp
2271	 * to effect an appropriate death.  The kernel will then restart us.
2272	 *
2273	 * The one exception to SI_FROMKERNEL() is SIGFPE (a.k.a. LVL6), which
2274	 * the kernel can send us when it wants to effect an orderly reboot.
2275	 * For this case we must also verify si_code is zero, rather than a
2276	 * code such as FPE_INTDIV which a bug might have triggered.
2277	 */
2278	if (sip != NULL && SI_FROMKERNEL(sip) &&
2279	    (sig != SIGFPE || sip->si_code == 0)) {
2280
2281		(void) sigemptyset(&act.sa_mask);
2282		act.sa_handler = SIG_DFL;
2283		act.sa_flags = 0;
2284		(void) sigaction(sig, &act, NULL);
2285
2286		(void) sigfillset(&ucp->uc_sigmask);
2287		(void) sigdelset(&ucp->uc_sigmask, sig);
2288		ucp->uc_flags |= UC_SIGMASK;
2289
2290		(void) setcontext(ucp);
2291	}
2292
2293	/*
2294	 * If the signal received is a LVLQ signal, do not really
2295	 * change levels, just restate the current level.  If the
2296	 * signal is not a LVLQ, set the new level to the signal
2297	 * received.
2298	 */
2299	if (sig == LVLQ) {
2300		new_state = cur_state;
2301		lvlq_received = B_TRUE;
2302	} else {
2303		new_state = sig;
2304	}
2305
2306	/*
2307	 * Clear all times and repeat counts in the process table
2308	 * since either the level is changing or the user has editted
2309	 * the inittab file and wants us to look at it again.
2310	 * If the user has fixed a typo, we don't want residual timing
2311	 * data preventing the fixed command line from executing.
2312	 */
2313	for (process = proc_table;
2314	    (process < proc_table + num_proc); process++) {
2315		process->p_time = 0L;
2316		process->p_count = 0;
2317	}
2318
2319	/*
2320	 * Set the flag to indicate that a "user signal" was received.
2321	 */
2322	wakeup.w_flags.w_usersignal = 1;
2323}
2324
2325
2326/*
2327 * alarmclk
2328 */
2329static void
2330alarmclk()
2331{
2332	time_up = TRUE;
2333}
2334
2335/*
2336 * childeath_single():
2337 *
2338 * This used to be the SIGCLD handler and it was set with signal()
2339 * (as opposed to sigset()).  When a child exited we'd come to the
2340 * handler, wait for the child, and reenable the handler with
2341 * signal() just before returning.  The implementation of signal()
2342 * checks with waitid() for waitable children and sends a SIGCLD
2343 * if there are some.  If children are exiting faster than the
2344 * handler can run we keep sending signals and the handler never
2345 * gets to return and eventually the stack runs out and init dies.
2346 * To prevent that we set the handler with sigset() so the handler
2347 * doesn't need to be reset, and in childeath() (see below) we
2348 * call childeath_single() as long as there are children to be
2349 * waited for.  If a child exits while init is in the handler a
2350 * SIGCLD will be pending and delivered on return from the handler.
2351 * If the child was already waited for the handler will have nothing
2352 * to do and return, otherwise the child will be waited for.
2353 */
2354static void
2355childeath_single(pid_t pid, int status)
2356{
2357	struct PROC_TABLE	*process;
2358	struct pidlist		*pp;
2359
2360	/*
2361	 * Scan the process table to see if we are interested in this process.
2362	 */
2363	for (process = proc_table;
2364	    (process < proc_table + num_proc); process++) {
2365		if ((process->p_flags & (LIVING|OCCUPIED)) ==
2366		    (LIVING|OCCUPIED) && process->p_pid == pid) {
2367
2368			/*
2369			 * Mark this process as having died and store the exit
2370			 * status.  Also set the wakeup flag for a dead child
2371			 * and break out of the loop.
2372			 */
2373			process->p_flags &= ~LIVING;
2374			process->p_exit = (short)status;
2375			wakeup.w_flags.w_childdeath = 1;
2376
2377			return;
2378		}
2379	}
2380
2381	/*
2382	 * No process was found above, look through auxiliary list.
2383	 */
2384	(void) sighold(SIGPOLL);
2385	pp = Plhead;
2386	while (pp) {
2387		if (pid > pp->pl_pid) {
2388			/*
2389			 * Keep on looking.
2390			 */
2391			pp = pp->pl_next;
2392			continue;
2393		} else if (pid < pp->pl_pid) {
2394			/*
2395			 * Not in the list.
2396			 */
2397			break;
2398		} else {
2399			/*
2400			 * This is a dead "godchild".
2401			 */
2402			pp->pl_dflag = 1;
2403			pp->pl_exit = (short)status;
2404			wakeup.w_flags.w_childdeath = 1;
2405			Gchild = 1;	/* Notice to call cleanaux(). */
2406			break;
2407		}
2408	}
2409
2410	(void) sigrelse(SIGPOLL);
2411}
2412
2413/* ARGSUSED */
2414static void
2415childeath(int signo)
2416{
2417	pid_t pid;
2418	int status;
2419
2420	while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
2421		childeath_single(pid, status);
2422}
2423
2424static void
2425powerfail()
2426{
2427	(void) nice(-19);
2428	wakeup.w_flags.w_powerhit = 1;
2429}
2430
2431/*
2432 * efork() forks a child and the parent inserts the process in its table
2433 * of processes that are directly a result of forks that it has performed.
2434 * The child just changes the "global" with the process id for this process
2435 * to it's new value.
2436 * If efork() is called with a pointer into the proc_table it uses that slot,
2437 * otherwise it searches for a free slot.  Regardless of how it was called,
2438 * it returns the pointer to the proc_table entry
2439 *
2440 * The SIGCLD signal is blocked (held) before calling efork()
2441 * and is unblocked (released) after efork() returns.
2442 *
2443 * Ideally, this should be rewritten to use modern signal semantics.
2444 */
2445static struct PROC_TABLE *
2446efork(int action, struct PROC_TABLE *process, int modes)
2447{
2448	pid_t	childpid;
2449	struct PROC_TABLE *proc;
2450	int		i;
2451	/*
2452	 * Freshen up the proc_table, removing any entries for dead processes
2453	 * that don't have NOCLEANUP set.  Perform the necessary accounting.
2454	 */
2455	for (proc = proc_table; (proc < proc_table + num_proc); proc++) {
2456		if ((proc->p_flags & (OCCUPIED|LIVING|NOCLEANUP)) ==
2457		    (OCCUPIED)) {
2458			/*
2459			 * Is this a named process?
2460			 * If so, do the necessary bookkeeping.
2461			 */
2462			if (proc->p_flags & NAMED)
2463				(void) account(DEAD_PROCESS, proc, NULL);
2464
2465			/*
2466			 * Free this entry for new usage.
2467			 */
2468			proc->p_flags = 0;
2469		}
2470	}
2471
2472	while ((childpid = fork()) == FAILURE) {
2473		/*
2474		 * Shorten the alarm timer in case someone else's child dies
2475		 * and free up a slot in the process table.
2476		 */
2477		setimer(5);
2478
2479		/*
2480		 * Wait for some children to die.  Since efork()
2481		 * is always called with SIGCLD blocked, unblock
2482		 * it here so that child death signals can come in.
2483		 */
2484		(void) sigrelse(SIGCLD);
2485		(void) pause();
2486		(void) sighold(SIGCLD);
2487		setimer(0);
2488	}
2489
2490	if (childpid != 0) {
2491
2492		if (process == NULLPROC) {
2493			/*
2494			 * No proc table pointer specified so search
2495			 * for a free slot.
2496			 */
2497			for (process = proc_table;  process->p_flags != 0 &&
2498			    (process < proc_table + num_proc); process++)
2499					;
2500
2501			if (process == (proc_table + num_proc)) {
2502				int old_proc_table_size = num_proc;
2503
2504				/* Increase the process table size */
2505				increase_proc_table_size();
2506				if (old_proc_table_size == num_proc) {
2507					/* didn't grow: memory failure */
2508					return (NO_ROOM);
2509				} else {
2510					process =
2511					    proc_table + old_proc_table_size;
2512				}
2513			}
2514
2515			process->p_time = 0L;
2516			process->p_count = 0;
2517		}
2518		process->p_id[0] = '\0';
2519		process->p_id[1] = '\0';
2520		process->p_id[2] = '\0';
2521		process->p_id[3] = '\0';
2522		process->p_pid = childpid;
2523		process->p_flags = (LIVING | OCCUPIED | modes);
2524		process->p_exit = 0;
2525
2526		st_write();
2527	} else {
2528		if ((action & (M_WAIT | M_BOOTWAIT)) == 0)
2529			(void) setpgrp();
2530
2531		process = NULLPROC;
2532
2533		/*
2534		 * Reset all signals to the system defaults.
2535		 */
2536		for (i = SIGHUP; i <= SIGRTMAX; i++)
2537			(void) sigset(i, SIG_DFL);
2538
2539		/*
2540		 * POSIX B.2.2.2 advises that init should set SIGTTOU,
2541		 * SIGTTIN, and SIGTSTP to SIG_IGN.
2542		 *
2543		 * Make sure that SIGXCPU and SIGXFSZ also remain ignored,
2544		 * for backward compatibility.
2545		 */
2546		(void) sigset(SIGTTIN, SIG_IGN);
2547		(void) sigset(SIGTTOU, SIG_IGN);
2548		(void) sigset(SIGTSTP, SIG_IGN);
2549		(void) sigset(SIGXCPU, SIG_IGN);
2550		(void) sigset(SIGXFSZ, SIG_IGN);
2551	}
2552	return (process);
2553}
2554
2555
2556/*
2557 * waitproc() waits for a specified process to die.  For this function to
2558 * work, the specified process must already in the proc_table.  waitproc()
2559 * returns the exit status of the specified process when it dies.
2560 */
2561static long
2562waitproc(struct PROC_TABLE *process)
2563{
2564	int		answer;
2565	sigset_t	oldmask, newmask, zeromask;
2566
2567	(void) sigemptyset(&zeromask);
2568	(void) sigemptyset(&newmask);
2569
2570	(void) sigaddset(&newmask, SIGCLD);
2571
2572	/* Block SIGCLD and save the current signal mask */
2573	if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
2574		perror("SIG_BLOCK error");
2575
2576	/*
2577	 * Wait around until the process dies.
2578	 */
2579	if (process->p_flags & LIVING)
2580		(void) sigsuspend(&zeromask);
2581
2582	/* Reset signal mask to unblock SIGCLD */
2583	if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
2584		perror("SIG_SETMASK error");
2585
2586	if (process->p_flags & LIVING)
2587		return (FAILURE);
2588
2589	/*
2590	 * Make sure to only return 16 bits so that answer will always
2591	 * be positive whenever the process of interest really died.
2592	 */
2593	answer = (process->p_exit & 0xffff);
2594
2595	/*
2596	 * Free the slot in the proc_table.
2597	 */
2598	process->p_flags = 0;
2599	return (answer);
2600}
2601
2602/*
2603 * notify_pam_dead(): calls into the PAM framework to close the given session.
2604 */
2605static void
2606notify_pam_dead(struct utmpx *up)
2607{
2608	pam_handle_t *pamh;
2609	char user[sizeof (up->ut_user) + 1];
2610	char ttyn[sizeof (up->ut_line) + 1];
2611	char host[sizeof (up->ut_host) + 1];
2612
2613	/*
2614	 * PAM does not take care of updating utmpx/wtmpx.
2615	 */
2616	(void) snprintf(user, sizeof (user), "%s", up->ut_user);
2617	(void) snprintf(ttyn, sizeof (ttyn), "%s", up->ut_line);
2618	(void) snprintf(host, sizeof (host), "%s", up->ut_host);
2619
2620	if (pam_start("init", user, NULL, &pamh) == PAM_SUCCESS)  {
2621		(void) pam_set_item(pamh, PAM_TTY, ttyn);
2622		(void) pam_set_item(pamh, PAM_RHOST, host);
2623		(void) pam_close_session(pamh, 0);
2624		(void) pam_end(pamh, PAM_SUCCESS);
2625	}
2626}
2627
2628/*
2629 * Check you can access utmpx (As / may be read-only and
2630 * /var may not be mounted yet).
2631 */
2632static int
2633access_utmpx(void)
2634{
2635	do {
2636		utmpx_ok = (access(UTMPX, R_OK|W_OK) == 0);
2637	} while (!utmpx_ok && errno == EINTR);
2638
2639	return (utmpx_ok);
2640}
2641
2642/*
2643 * account() updates entries in utmpx and appends new entries to the end of
2644 * wtmpx (assuming they exist).  The program argument indicates the name of
2645 * program if INIT_PROCESS, otherwise should be NULL.
2646 *
2647 * account() only blocks for INIT_PROCESS requests.
2648 *
2649 * Returns non-zero if write failed.
2650 */
2651static int
2652account(short state, struct PROC_TABLE *process, char *program)
2653{
2654	struct utmpx utmpbuf, *u, *oldu;
2655	int tmplen;
2656	char fail_buf[UT_LINE_SZ];
2657	sigset_t block, unblock;
2658
2659	if (!utmpx_ok && !access_utmpx()) {
2660		return (-1);
2661	}
2662
2663	/*
2664	 * Set up the prototype for the utmp structure we want to write.
2665	 */
2666	u = &utmpbuf;
2667	(void) memset(u, 0, sizeof (struct utmpx));
2668
2669	/*
2670	 * Fill in the various fields of the utmp structure.
2671	 */
2672	u->ut_id[0] = process->p_id[0];
2673	u->ut_id[1] = process->p_id[1];
2674	u->ut_id[2] = process->p_id[2];
2675	u->ut_id[3] = process->p_id[3];
2676	u->ut_pid = process->p_pid;
2677
2678	/*
2679	 * Fill the "ut_exit" structure.
2680	 */
2681	u->ut_exit.e_termination = WTERMSIG(process->p_exit);
2682	u->ut_exit.e_exit = WEXITSTATUS(process->p_exit);
2683	u->ut_type = state;
2684
2685	(void) time(&u->ut_tv.tv_sec);
2686
2687	/*
2688	 * Block signals for utmp update.
2689	 */
2690	(void) sigfillset(&block);
2691	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2692
2693	/*
2694	 * See if there already is such an entry in the "utmpx" file.
2695	 */
2696	setutxent();	/* Start at beginning of utmpx file. */
2697
2698	if ((oldu = getutxid(u)) != NULL) {
2699		/*
2700		 * Copy in the old "user", "line" and "host" fields
2701		 * to our new structure.
2702		 */
2703		bcopy(oldu->ut_user, u->ut_user, sizeof (u->ut_user));
2704		bcopy(oldu->ut_line, u->ut_line, sizeof (u->ut_line));
2705		bcopy(oldu->ut_host, u->ut_host, sizeof (u->ut_host));
2706		u->ut_syslen = (tmplen = strlen(u->ut_host)) ?
2707		    min(tmplen + 1, sizeof (u->ut_host)) : 0;
2708
2709		if (oldu->ut_type == USER_PROCESS && state == DEAD_PROCESS) {
2710			notify_pam_dead(oldu);
2711		}
2712	}
2713
2714	/*
2715	 * Perform special accounting. Insert the special string into the
2716	 * ut_line array. For INIT_PROCESSes put in the name of the
2717	 * program in the "ut_user" field.
2718	 */
2719	switch (state) {
2720	case INIT_PROCESS:
2721		(void) strncpy(u->ut_user, program, sizeof (u->ut_user));
2722		(void) strcpy(fail_buf, "INIT_PROCESS");
2723		break;
2724
2725	default:
2726		(void) strlcpy(fail_buf, u->ut_id, sizeof (u->ut_id) + 1);
2727		break;
2728	}
2729
2730	/*
2731	 * Write out the updated entry to utmpx file.
2732	 */
2733	if (pututxline(u) == NULL) {
2734		console(B_TRUE, "Failed write of utmpx entry: \"%s\": %s\n",
2735		    fail_buf, strerror(errno));
2736		endutxent();
2737		(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2738		return (-1);
2739	}
2740
2741	/*
2742	 * If we're able to write to utmpx, then attempt to add to the
2743	 * end of the wtmpx file.
2744	 */
2745	updwtmpx(WTMPX, u);
2746
2747	endutxent();
2748
2749	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2750
2751	return (0);
2752}
2753
2754static void
2755clearent(pid_t pid, short status)
2756{
2757	struct utmpx *up;
2758	sigset_t block, unblock;
2759
2760	/*
2761	 * Block signals for utmp update.
2762	 */
2763	(void) sigfillset(&block);
2764	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2765
2766	/*
2767	 * No error checking for now.
2768	 */
2769
2770	setutxent();
2771	while (up = getutxent()) {
2772		if (up->ut_pid == pid) {
2773			if (up->ut_type == DEAD_PROCESS) {
2774				/*
2775				 * Cleaned up elsewhere.
2776				 */
2777				continue;
2778			}
2779
2780			notify_pam_dead(up);
2781
2782			up->ut_type = DEAD_PROCESS;
2783			up->ut_exit.e_termination = WTERMSIG(status);
2784			up->ut_exit.e_exit = WEXITSTATUS(status);
2785			(void) time(&up->ut_tv.tv_sec);
2786
2787			(void) pututxline(up);
2788			/*
2789			 * Now attempt to add to the end of the
2790			 * wtmp and wtmpx files.  Do not create
2791			 * if they don't already exist.
2792			 */
2793			updwtmpx(WTMPX, up);
2794
2795			break;
2796		}
2797	}
2798
2799	endutxent();
2800	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2801}
2802
2803/*
2804 * prog_name() searches for the word or unix path name and
2805 * returns a pointer to the last element of the pathname.
2806 */
2807static char *
2808prog_name(char *string)
2809{
2810	char	*ptr, *ptr2;
2811	/* XXX - utmp - fix name length */
2812	static char word[_POSIX_LOGIN_NAME_MAX];
2813
2814	/*
2815	 * Search for the first word skipping leading spaces and tabs.
2816	 */
2817	while (*string == ' ' || *string == '\t')
2818		string++;
2819
2820	/*
2821	 * If the first non-space non-tab character is not one allowed in
2822	 * a word, return a pointer to a null string, otherwise parse the
2823	 * pathname.
2824	 */
2825	if (*string != '.' && *string != '/' && *string != '_' &&
2826	    (*string < 'a' || *string > 'z') &&
2827	    (*string < 'A' || * string > 'Z') &&
2828	    (*string < '0' || *string > '9'))
2829		return ("");
2830
2831	/*
2832	 * Parse the pathname looking forward for '/', ' ', '\t', '\n' or
2833	 * '\0'.  Each time a '/' is found, move "ptr" to one past the
2834	 * '/', thus when a ' ', '\t', '\n', or '\0' is found, "ptr" will
2835	 * point to the last element of the pathname.
2836	 */
2837	for (ptr = string; *string != ' ' && *string != '\t' &&
2838	    *string != '\n' && *string != '\0'; string++) {
2839		if (*string == '/')
2840			ptr = string+1;
2841	}
2842
2843	/*
2844	 * Copy out up to the size of the "ut_user" array into "word",
2845	 * null terminate it and return a pointer to it.
2846	 */
2847	/* XXX - utmp - fix name length */
2848	for (ptr2 = &word[0]; ptr2 < &word[_POSIX_LOGIN_NAME_MAX - 1] &&
2849	    ptr < string; /* CSTYLED */)
2850		*ptr2++ = *ptr++;
2851
2852	*ptr2 = '\0';
2853	return (&word[0]);
2854}
2855
2856
2857/*
2858 * realcon() returns a nonzero value if there is a character device
2859 * associated with SYSCON that has the same device number as CONSOLE.
2860 */
2861static int
2862realcon()
2863{
2864	struct stat sconbuf, conbuf;
2865
2866	if (stat(SYSCON, &sconbuf) != -1 &&
2867	    stat(CONSOLE, &conbuf) != -1 &&
2868	    S_ISCHR(sconbuf.st_mode) &&
2869	    S_ISCHR(conbuf.st_mode) &&
2870	    sconbuf.st_rdev == conbuf.st_rdev) {
2871		return (1);
2872	} else {
2873		return (0);
2874	}
2875}
2876
2877
2878/*
2879 * get_ioctl_syscon() retrieves the SYSCON settings from the IOCTLSYSCON file.
2880 * Returns true if the IOCTLSYSCON file needs to be written (with
2881 * write_ioctl_syscon() below)
2882 */
2883static int
2884get_ioctl_syscon()
2885{
2886	FILE	*fp;
2887	unsigned int	iflags, oflags, cflags, lflags, ldisc, cc[18];
2888	int		i, valid_format = 0;
2889
2890	/*
2891	 * Read in the previous modes for SYSCON from IOCTLSYSCON.
2892	 */
2893	if ((fp = fopen(IOCTLSYSCON, "r")) == NULL) {
2894		stored_syscon_termios = dflt_termios;
2895		console(B_TRUE,
2896		    "warning:%s does not exist, default settings assumed\n",
2897		    IOCTLSYSCON);
2898	} else {
2899
2900	    i = fscanf(fp,
2901	    "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2902		&iflags, &oflags, &cflags, &lflags,
2903		&cc[0], &cc[1], &cc[2], &cc[3], &cc[4], &cc[5], &cc[6],
2904		&cc[7], &cc[8], &cc[9], &cc[10], &cc[11], &cc[12], &cc[13],
2905		&cc[14], &cc[15], &cc[16], &cc[17]);
2906
2907	    if (i == 22) {
2908		stored_syscon_termios.c_iflag = iflags;
2909		stored_syscon_termios.c_oflag = oflags;
2910		stored_syscon_termios.c_cflag = cflags;
2911		stored_syscon_termios.c_lflag = lflags;
2912		for (i = 0; i < 18; i++)
2913			stored_syscon_termios.c_cc[i] = (char)cc[i];
2914		valid_format = 1;
2915	    } else if (i == 13) {
2916		rewind(fp);
2917		i = fscanf(fp, "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2918		    &iflags, &oflags, &cflags, &lflags, &ldisc, &cc[0], &cc[1],
2919		    &cc[2], &cc[3], &cc[4], &cc[5], &cc[6], &cc[7]);
2920
2921		/*
2922		 * If the file is formatted properly, use the values to
2923		 * initialize the console terminal condition.
2924		 */
2925		stored_syscon_termios.c_iflag = (ushort_t)iflags;
2926		stored_syscon_termios.c_oflag = (ushort_t)oflags;
2927		stored_syscon_termios.c_cflag = (ushort_t)cflags;
2928		stored_syscon_termios.c_lflag = (ushort_t)lflags;
2929		for (i = 0; i < 8; i++)
2930			stored_syscon_termios.c_cc[i] = (char)cc[i];
2931		valid_format = 1;
2932	    }
2933	    (void) fclose(fp);
2934
2935	    /* If the file is badly formatted, use the default settings. */
2936	    if (!valid_format)
2937		stored_syscon_termios = dflt_termios;
2938	}
2939
2940	/* If the file had a bad format, rewrite it later. */
2941	return (!valid_format);
2942}
2943
2944
2945static void
2946write_ioctl_syscon()
2947{
2948	FILE *fp;
2949	int i;
2950
2951	(void) unlink(SYSCON);
2952	(void) link(SYSTTY, SYSCON);
2953	(void) umask(022);
2954	fp = fopen(IOCTLSYSCON, "w");
2955
2956	(void) fprintf(fp, "%x:%x:%x:%x:0", stored_syscon_termios.c_iflag,
2957	    stored_syscon_termios.c_oflag, stored_syscon_termios.c_cflag,
2958	    stored_syscon_termios.c_lflag);
2959	for (i = 0; i < 8; ++i)
2960		(void) fprintf(fp, ":%x", stored_syscon_termios.c_cc[i]);
2961	(void) putc('\n', fp);
2962
2963	(void) fflush(fp);
2964	(void) fsync(fileno(fp));
2965	(void) fclose(fp);
2966	(void) umask(cmask);
2967}
2968
2969
2970/*
2971 * void console(boolean_t, char *, ...)
2972 *   Outputs the requested message to the system console.  Note that the number
2973 *   of arguments passed to console() should be determined by the print format.
2974 *
2975 *   The "prefix" parameter indicates whether or not "INIT: " should precede the
2976 *   message.
2977 *
2978 *   To make sure we write to the console in a sane fashion, we use the modes
2979 *   we keep in stored_syscon_termios (which we read out of /etc/ioctl.syscon).
2980 *   Afterwards we restore whatever modes were already there.
2981 */
2982/* PRINTFLIKE2 */
2983static void
2984console(boolean_t prefix, char *format, ...)
2985{
2986	char	outbuf[BUFSIZ];
2987	va_list	args;
2988	int fd, getret;
2989	struct termios old_syscon_termios;
2990	FILE *f;
2991
2992	/*
2993	 * We open SYSCON anew each time in case it has changed (see
2994	 * userinit()).
2995	 */
2996	if ((fd = open(SYSCON, O_RDWR | O_NOCTTY)) < 0 ||
2997	    (f = fdopen(fd, "r+")) == NULL) {
2998		if (prefix)
2999			syslog(LOG_WARNING, "INIT: ");
3000		va_start(args, format);
3001		vsyslog(LOG_WARNING, format, args);
3002		va_end(args);
3003		if (fd >= 0)
3004			(void) close(fd);
3005		return;
3006	}
3007	setbuf(f, &outbuf[0]);
3008
3009	getret = tcgetattr(fd, &old_syscon_termios);
3010	old_syscon_termios.c_cflag &= ~HUPCL;
3011	if (realcon())
3012		/* Don't overwrite cflag of real console. */
3013		stored_syscon_termios.c_cflag = old_syscon_termios.c_cflag;
3014
3015	stored_syscon_termios.c_cflag &= ~HUPCL;
3016
3017	(void) tcsetattr(fd, TCSANOW, &stored_syscon_termios);
3018
3019	if (prefix)
3020		(void) fprintf(f, "\nINIT: ");
3021	va_start(args, format);
3022	(void) vfprintf(f, format, args);
3023	va_end(args);
3024
3025	if (getret == 0)
3026		(void) tcsetattr(fd, TCSADRAIN, &old_syscon_termios);
3027
3028	(void) fclose(f);
3029}
3030
3031/*
3032 * timer() is a substitute for sleep() which uses alarm() and pause().
3033 */
3034static void
3035timer(int waitime)
3036{
3037	setimer(waitime);
3038	while (time_up == FALSE)
3039		(void) pause();
3040}
3041
3042static void
3043setimer(int timelimit)
3044{
3045	alarmclk();
3046	(void) alarm(timelimit);
3047	time_up = (timelimit ? FALSE : TRUE);
3048}
3049
3050/*
3051 * Fails with
3052 *   ENOMEM - out of memory
3053 *   ECONNABORTED - repository connection broken
3054 *   EPERM - permission denied
3055 *   EACCES - backend access denied
3056 *   EROFS - backend readonly
3057 */
3058static int
3059get_or_add_startd(scf_instance_t *inst)
3060{
3061	scf_handle_t *h;
3062	scf_scope_t *scope = NULL;
3063	scf_service_t *svc = NULL;
3064	int ret = 0;
3065
3066	h = scf_instance_handle(inst);
3067
3068	if (scf_handle_decode_fmri(h, SCF_SERVICE_STARTD, NULL, NULL, inst,
3069	    NULL, NULL, SCF_DECODE_FMRI_EXACT) == 0)
3070		return (0);
3071
3072	switch (scf_error()) {
3073	case SCF_ERROR_CONNECTION_BROKEN:
3074		return (ECONNABORTED);
3075
3076	case SCF_ERROR_NOT_FOUND:
3077		break;
3078
3079	case SCF_ERROR_HANDLE_MISMATCH:
3080	case SCF_ERROR_INVALID_ARGUMENT:
3081	case SCF_ERROR_CONSTRAINT_VIOLATED:
3082	default:
3083		bad_error("scf_handle_decode_fmri", scf_error());
3084	}
3085
3086	/* Make sure we're right, since we're adding piece-by-piece. */
3087	assert(strcmp(SCF_SERVICE_STARTD,
3088	    "svc:/system/svc/restarter:default") == 0);
3089
3090	if ((scope = scf_scope_create(h)) == NULL ||
3091	    (svc = scf_service_create(h)) == NULL) {
3092		ret = ENOMEM;
3093		goto out;
3094	}
3095
3096get_scope:
3097	if (scf_handle_get_scope(h, SCF_SCOPE_LOCAL, scope) != 0) {
3098		switch (scf_error()) {
3099		case SCF_ERROR_CONNECTION_BROKEN:
3100			ret = ECONNABORTED;
3101			goto out;
3102
3103		case SCF_ERROR_NOT_FOUND:
3104			(void) fputs(gettext(
3105			    "smf(5) repository missing local scope.\n"),
3106			    stderr);
3107			exit(1);
3108			/* NOTREACHED */
3109
3110		case SCF_ERROR_HANDLE_MISMATCH:
3111		case SCF_ERROR_INVALID_ARGUMENT:
3112		default:
3113			bad_error("scf_handle_get_scope", scf_error());
3114		}
3115	}
3116
3117get_svc:
3118	if (scf_scope_get_service(scope, "system/svc/restarter", svc) != 0) {
3119		switch (scf_error()) {
3120		case SCF_ERROR_CONNECTION_BROKEN:
3121			ret = ECONNABORTED;
3122			goto out;
3123
3124		case SCF_ERROR_DELETED:
3125			goto get_scope;
3126
3127		case SCF_ERROR_NOT_FOUND:
3128			break;
3129
3130		case SCF_ERROR_HANDLE_MISMATCH:
3131		case SCF_ERROR_INVALID_ARGUMENT:
3132		case SCF_ERROR_NOT_SET:
3133		default:
3134			bad_error("scf_scope_get_service", scf_error());
3135		}
3136
3137add_svc:
3138		if (scf_scope_add_service(scope, "system/svc/restarter", svc) !=
3139		    0) {
3140			switch (scf_error()) {
3141			case SCF_ERROR_CONNECTION_BROKEN:
3142				ret = ECONNABORTED;
3143				goto out;
3144
3145			case SCF_ERROR_EXISTS:
3146				goto get_svc;
3147
3148			case SCF_ERROR_PERMISSION_DENIED:
3149				ret = EPERM;
3150				goto out;
3151
3152			case SCF_ERROR_BACKEND_ACCESS:
3153				ret = EACCES;
3154				goto out;
3155
3156			case SCF_ERROR_BACKEND_READONLY:
3157				ret = EROFS;
3158				goto out;
3159
3160			case SCF_ERROR_HANDLE_MISMATCH:
3161			case SCF_ERROR_INVALID_ARGUMENT:
3162			case SCF_ERROR_NOT_SET:
3163			default:
3164				bad_error("scf_scope_add_service", scf_error());
3165			}
3166		}
3167	}
3168
3169get_inst:
3170	if (scf_service_get_instance(svc, "default", inst) != 0) {
3171		switch (scf_error()) {
3172		case SCF_ERROR_CONNECTION_BROKEN:
3173			ret = ECONNABORTED;
3174			goto out;
3175
3176		case SCF_ERROR_DELETED:
3177			goto add_svc;
3178
3179		case SCF_ERROR_NOT_FOUND:
3180			break;
3181
3182		case SCF_ERROR_HANDLE_MISMATCH:
3183		case SCF_ERROR_INVALID_ARGUMENT:
3184		case SCF_ERROR_NOT_SET:
3185		default:
3186			bad_error("scf_service_get_instance", scf_error());
3187		}
3188
3189		if (scf_service_add_instance(svc, "default", inst) !=
3190		    0) {
3191			switch (scf_error()) {
3192			case SCF_ERROR_CONNECTION_BROKEN:
3193				ret = ECONNABORTED;
3194				goto out;
3195
3196			case SCF_ERROR_DELETED:
3197				goto add_svc;
3198
3199			case SCF_ERROR_EXISTS:
3200				goto get_inst;
3201
3202			case SCF_ERROR_PERMISSION_DENIED:
3203				ret = EPERM;
3204				goto out;
3205
3206			case SCF_ERROR_BACKEND_ACCESS:
3207				ret = EACCES;
3208				goto out;
3209
3210			case SCF_ERROR_BACKEND_READONLY:
3211				ret = EROFS;
3212				goto out;
3213
3214			case SCF_ERROR_HANDLE_MISMATCH:
3215			case SCF_ERROR_INVALID_ARGUMENT:
3216			case SCF_ERROR_NOT_SET:
3217			default:
3218				bad_error("scf_service_add_instance",
3219				    scf_error());
3220			}
3221		}
3222	}
3223
3224	ret = 0;
3225
3226out:
3227	scf_service_destroy(svc);
3228	scf_scope_destroy(scope);
3229	return (ret);
3230}
3231
3232/*
3233 * Fails with
3234 *   ECONNABORTED - repository connection broken
3235 *   ECANCELED - the transaction's property group was deleted
3236 */
3237static int
3238transaction_add_set(scf_transaction_t *tx, scf_transaction_entry_t *ent,
3239    const char *pname, scf_type_t type)
3240{
3241change_type:
3242	if (scf_transaction_property_change_type(tx, ent, pname, type) == 0)
3243		return (0);
3244
3245	switch (scf_error()) {
3246	case SCF_ERROR_CONNECTION_BROKEN:
3247		return (ECONNABORTED);
3248
3249	case SCF_ERROR_DELETED:
3250		return (ECANCELED);
3251
3252	case SCF_ERROR_NOT_FOUND:
3253		goto new;
3254
3255	case SCF_ERROR_HANDLE_MISMATCH:
3256	case SCF_ERROR_INVALID_ARGUMENT:
3257	case SCF_ERROR_NOT_BOUND:
3258	case SCF_ERROR_NOT_SET:
3259	default:
3260		bad_error("scf_transaction_property_change_type", scf_error());
3261	}
3262
3263new:
3264	if (scf_transaction_property_new(tx, ent, pname, type) == 0)
3265		return (0);
3266
3267	switch (scf_error()) {
3268	case SCF_ERROR_CONNECTION_BROKEN:
3269		return (ECONNABORTED);
3270
3271	case SCF_ERROR_DELETED:
3272		return (ECANCELED);
3273
3274	case SCF_ERROR_EXISTS:
3275		goto change_type;
3276
3277	case SCF_ERROR_HANDLE_MISMATCH:
3278	case SCF_ERROR_INVALID_ARGUMENT:
3279	case SCF_ERROR_NOT_BOUND:
3280	case SCF_ERROR_NOT_SET:
3281	default:
3282		bad_error("scf_transaction_property_new", scf_error());
3283		/* NOTREACHED */
3284	}
3285}
3286
3287static void
3288scferr(void)
3289{
3290	switch (scf_error()) {
3291	case SCF_ERROR_NO_MEMORY:
3292		console(B_TRUE, gettext("Out of memory.\n"));
3293		break;
3294
3295	case SCF_ERROR_CONNECTION_BROKEN:
3296		console(B_TRUE, gettext(
3297		    "Connection to smf(5) repository server broken.\n"));
3298		break;
3299
3300	case SCF_ERROR_NO_RESOURCES:
3301		console(B_TRUE, gettext(
3302		    "smf(5) repository server is out of memory.\n"));
3303		break;
3304
3305	case SCF_ERROR_PERMISSION_DENIED:
3306		console(B_TRUE, gettext("Insufficient privileges.\n"));
3307		break;
3308
3309	default:
3310		console(B_TRUE, gettext("libscf error: %s\n"),
3311		    scf_strerror(scf_error()));
3312	}
3313}
3314
3315static void
3316lscf_set_runlevel(char rl)
3317{
3318	scf_handle_t *h;
3319	scf_instance_t *inst = NULL;
3320	scf_propertygroup_t *pg = NULL;
3321	scf_transaction_t *tx = NULL;
3322	scf_transaction_entry_t *ent = NULL;
3323	scf_value_t *val = NULL;
3324	char buf[2];
3325	int r;
3326
3327	h = scf_handle_create(SCF_VERSION);
3328	if (h == NULL) {
3329		scferr();
3330		return;
3331	}
3332
3333	if (scf_handle_bind(h) != 0) {
3334		switch (scf_error()) {
3335		case SCF_ERROR_NO_SERVER:
3336			console(B_TRUE,
3337			    gettext("smf(5) repository server not running.\n"));
3338			goto bail;
3339
3340		default:
3341			scferr();
3342			goto bail;
3343		}
3344	}
3345
3346	if ((inst = scf_instance_create(h)) == NULL ||
3347	    (pg = scf_pg_create(h)) == NULL ||
3348	    (val = scf_value_create(h)) == NULL ||
3349	    (tx = scf_transaction_create(h)) == NULL ||
3350	    (ent = scf_entry_create(h)) == NULL) {
3351		scferr();
3352		goto bail;
3353	}
3354
3355get_inst:
3356	r = get_or_add_startd(inst);
3357	switch (r) {
3358	case 0:
3359		break;
3360
3361	case ENOMEM:
3362	case ECONNABORTED:
3363	case EPERM:
3364	case EACCES:
3365	case EROFS:
3366		scferr();
3367		goto bail;
3368	default:
3369		bad_error("get_or_add_startd", r);
3370	}
3371
3372get_pg:
3373	if (scf_instance_get_pg(inst, SCF_PG_OPTIONS_OVR, pg) != 0) {
3374		switch (scf_error()) {
3375		case SCF_ERROR_CONNECTION_BROKEN:
3376			scferr();
3377			goto bail;
3378
3379		case SCF_ERROR_DELETED:
3380			goto get_inst;
3381
3382		case SCF_ERROR_NOT_FOUND:
3383			break;
3384
3385		case SCF_ERROR_HANDLE_MISMATCH:
3386		case SCF_ERROR_INVALID_ARGUMENT:
3387		case SCF_ERROR_NOT_SET:
3388		default:
3389			bad_error("scf_instance_get_pg", scf_error());
3390		}
3391
3392add_pg:
3393		if (scf_instance_add_pg(inst, SCF_PG_OPTIONS_OVR,
3394		    SCF_PG_OPTIONS_OVR_TYPE, SCF_PG_OPTIONS_OVR_FLAGS, pg) !=
3395		    0) {
3396			switch (scf_error()) {
3397			case SCF_ERROR_CONNECTION_BROKEN:
3398			case SCF_ERROR_PERMISSION_DENIED:
3399			case SCF_ERROR_BACKEND_ACCESS:
3400				scferr();
3401				goto bail;
3402
3403			case SCF_ERROR_DELETED:
3404				goto get_inst;
3405
3406			case SCF_ERROR_EXISTS:
3407				goto get_pg;
3408
3409			case SCF_ERROR_HANDLE_MISMATCH:
3410			case SCF_ERROR_INVALID_ARGUMENT:
3411			case SCF_ERROR_NOT_SET:
3412			default:
3413				bad_error("scf_instance_add_pg", scf_error());
3414			}
3415		}
3416	}
3417
3418	buf[0] = rl;
3419	buf[1] = '\0';
3420	r = scf_value_set_astring(val, buf);
3421	assert(r == 0);
3422
3423	for (;;) {
3424		if (scf_transaction_start(tx, pg) != 0) {
3425			switch (scf_error()) {
3426			case SCF_ERROR_CONNECTION_BROKEN:
3427			case SCF_ERROR_PERMISSION_DENIED:
3428			case SCF_ERROR_BACKEND_ACCESS:
3429				scferr();
3430				goto bail;
3431
3432			case SCF_ERROR_DELETED:
3433				goto add_pg;
3434
3435			case SCF_ERROR_HANDLE_MISMATCH:
3436			case SCF_ERROR_NOT_BOUND:
3437			case SCF_ERROR_IN_USE:
3438			case SCF_ERROR_NOT_SET:
3439			default:
3440				bad_error("scf_transaction_start", scf_error());
3441			}
3442		}
3443
3444		r = transaction_add_set(tx, ent, "runlevel", SCF_TYPE_ASTRING);
3445		switch (r) {
3446		case 0:
3447			break;
3448
3449		case ECONNABORTED:
3450			scferr();
3451			goto bail;
3452
3453		case ECANCELED:
3454			scf_transaction_reset(tx);
3455			goto add_pg;
3456
3457		default:
3458			bad_error("transaction_add_set", r);
3459		}
3460
3461		r = scf_entry_add_value(ent, val);
3462		assert(r == 0);
3463
3464		r = scf_transaction_commit(tx);
3465		if (r == 1)
3466			break;
3467
3468		if (r != 0) {
3469			switch (scf_error()) {
3470			case SCF_ERROR_CONNECTION_BROKEN:
3471			case SCF_ERROR_PERMISSION_DENIED:
3472			case SCF_ERROR_BACKEND_ACCESS:
3473			case SCF_ERROR_BACKEND_READONLY:
3474				scferr();
3475				goto bail;
3476
3477			case SCF_ERROR_DELETED:
3478				scf_transaction_reset(tx);
3479				goto add_pg;
3480
3481			case SCF_ERROR_INVALID_ARGUMENT:
3482			case SCF_ERROR_NOT_BOUND:
3483			case SCF_ERROR_NOT_SET:
3484			default:
3485				bad_error("scf_transaction_commit",
3486				    scf_error());
3487			}
3488		}
3489
3490		scf_transaction_reset(tx);
3491		(void) scf_pg_update(pg);
3492	}
3493
3494bail:
3495	scf_transaction_destroy(tx);
3496	scf_entry_destroy(ent);
3497	scf_value_destroy(val);
3498	scf_pg_destroy(pg);
3499	scf_instance_destroy(inst);
3500
3501	(void) scf_handle_unbind(h);
3502	scf_handle_destroy(h);
3503}
3504
3505/*
3506 * Function to handle requests from users to main init running as process 1.
3507 */
3508static void
3509userinit(int argc, char **argv)
3510{
3511	FILE	*fp;
3512	char	*ln;
3513	int	init_signal;
3514	struct stat	sconbuf, conbuf;
3515	const char *usage_msg = "Usage: init [0123456SsQqabc]\n";
3516
3517	/*
3518	 * We are a user invoked init.  Is there an argument and is it
3519	 * a single character?  If not, print usage message and quit.
3520	 */
3521	if (argc != 2 || argv[1][1] != '\0') {
3522		(void) fprintf(stderr, usage_msg);
3523		exit(0);
3524	}
3525
3526	if ((init_signal = lvlname_to_state((char)argv[1][0])) == -1) {
3527		(void) fprintf(stderr, usage_msg);
3528		(void) audit_put_record(ADT_FAILURE, ADT_FAIL_VALUE_BAD_CMD,
3529		    argv[1]);
3530		exit(1);
3531	}
3532
3533	if (init_signal == SINGLE_USER) {
3534		/*
3535		 * Make sure this process is talking to a legal tty line
3536		 * and that /dev/syscon is linked to this line.
3537		 */
3538		ln = ttyname(0);	/* Get the name of tty */
3539		if (ln == NULL) {
3540			(void) fprintf(stderr,
3541			    "Standard input not a tty line\n");
3542			(void) audit_put_record(ADT_FAILURE,
3543			    ADT_FAIL_VALUE_BAD_TTY, argv[1]);
3544			exit(1);
3545		}
3546
3547		if ((stat(ln, &sconbuf) != -1) &&
3548		    (stat(SYSCON, &conbuf) == -1 ||
3549		    sconbuf.st_rdev != conbuf.st_rdev)) {
3550			/*
3551			 * /dev/syscon needs to change.
3552			 * Unlink /dev/syscon and relink it to the current line.
3553			 */
3554			if (lstat(SYSCON, &conbuf) != -1 &&
3555			    unlink(SYSCON) == FAILURE) {
3556				perror("Can't unlink /dev/syscon");
3557				(void) fprintf(stderr,
3558				    "Run command on the system console.\n");
3559				(void) audit_put_record(ADT_FAILURE,
3560				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3561				exit(1);
3562			}
3563			if (symlink(ln, SYSCON) == FAILURE) {
3564				(void) fprintf(stderr,
3565				    "Can't symlink /dev/syscon to %s: %s", ln,
3566				    strerror(errno));
3567
3568				/* Try to leave a syscon */
3569				(void) link(SYSTTY, SYSCON);
3570				(void) audit_put_record(ADT_FAILURE,
3571				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3572				exit(1);
3573			}
3574
3575			/*
3576			 * Try to leave a message on system console saying where
3577			 * /dev/syscon is currently connected.
3578			 */
3579			if ((fp = fopen(SYSTTY, "r+")) != NULL) {
3580				(void) fprintf(fp,
3581				    "\n****	SYSCON CHANGED TO %s	****\n",
3582				    ln);
3583				(void) fclose(fp);
3584			}
3585		}
3586	}
3587
3588	update_boot_archive(init_signal);
3589
3590	(void) audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]);
3591
3592	/*
3593	 * Signal init; init will take care of telling svc.startd.
3594	 */
3595	if (kill(init_pid, init_signal) == FAILURE) {
3596		(void) fprintf(stderr, "Must be super-user\n");
3597		(void) audit_put_record(ADT_FAILURE,
3598		    ADT_FAIL_VALUE_AUTH, argv[1]);
3599		exit(1);
3600	}
3601
3602	exit(0);
3603}
3604
3605
3606#define	DELTA	25	/* Number of pidlist elements to allocate at a time */
3607
3608/* ARGSUSED */
3609void
3610sigpoll(int n)
3611{
3612	struct pidrec prec;
3613	struct pidrec *p = &prec;
3614	struct pidlist *plp;
3615	struct pidlist *tp, *savetp;
3616	int i;
3617
3618	if (Pfd < 0) {
3619		return;
3620	}
3621
3622	for (;;) {
3623		/*
3624		 * Important Note: Either read will really fail (in which case
3625		 * return is all we can do) or will get EAGAIN (Pfd was opened
3626		 * O_NDELAY), in which case we also want to return.
3627		 * Always return from here!
3628		 */
3629		if (read(Pfd, p, sizeof (struct pidrec)) !=
3630						sizeof (struct pidrec)) {
3631			return;
3632		}
3633		switch (p->pd_type) {
3634
3635		case ADDPID:
3636			/*
3637			 * New "godchild", add to list.
3638			 */
3639			if (Plfree == NULL) {
3640				plp = (struct pidlist *)calloc(DELTA,
3641				    sizeof (struct pidlist));
3642				if (plp == NULL) {
3643					/* Can't save pid */
3644					break;
3645				}
3646				/*
3647				 * Point at 2nd record allocated, we'll use plp.
3648				 */
3649				tp = plp + 1;
3650				/*
3651				 * Link them into a chain.
3652				 */
3653				Plfree = tp;
3654				for (i = 0; i < DELTA - 2; i++) {
3655					tp->pl_next = tp + 1;
3656					tp++;
3657				}
3658			} else {
3659				plp = Plfree;
3660				Plfree = plp->pl_next;
3661			}
3662			plp->pl_pid = p->pd_pid;
3663			plp->pl_dflag = 0;
3664			plp->pl_next = NULL;
3665			/*
3666			 * Note - pid list is kept in increasing order of pids.
3667			 */
3668			if (Plhead == NULL) {
3669				Plhead = plp;
3670				/* Back up to read next record */
3671				break;
3672			} else {
3673				savetp = tp = Plhead;
3674				while (tp) {
3675					if (plp->pl_pid > tp->pl_pid) {
3676						savetp = tp;
3677						tp = tp->pl_next;
3678						continue;
3679					} else if (plp->pl_pid < tp->pl_pid) {
3680						if (tp == Plhead) {
3681							plp->pl_next = Plhead;
3682							Plhead = plp;
3683						} else {
3684							plp->pl_next =
3685							    savetp->pl_next;
3686							savetp->pl_next = plp;
3687						}
3688						break;
3689					} else {
3690						/* Already in list! */
3691						plp->pl_next = Plfree;
3692						Plfree = plp;
3693						break;
3694					}
3695				}
3696				if (tp == NULL) {
3697					/* Add to end of list */
3698					savetp->pl_next = plp;
3699				}
3700			}
3701			/* Back up to read next record. */
3702			break;
3703
3704		case REMPID:
3705			/*
3706			 * This one was handled by someone else,
3707			 * purge it from the list.
3708			 */
3709			if (Plhead == NULL) {
3710				/* Back up to read next record. */
3711				break;
3712			}
3713			savetp = tp = Plhead;
3714			while (tp) {
3715				if (p->pd_pid > tp->pl_pid) {
3716					/* Keep on looking. */
3717					savetp = tp;
3718					tp = tp->pl_next;
3719					continue;
3720				} else if (p->pd_pid < tp->pl_pid) {
3721					/* Not in list. */
3722					break;
3723				} else {
3724					/* Found it. */
3725					if (tp == Plhead)
3726						Plhead = tp->pl_next;
3727					else
3728						savetp->pl_next = tp->pl_next;
3729					tp->pl_next = Plfree;
3730					Plfree = tp;
3731					break;
3732				}
3733			}
3734			/* Back up to read next record. */
3735			break;
3736		default:
3737			console(B_TRUE, "Bad message on initpipe\n");
3738			break;
3739		}
3740	}
3741}
3742
3743
3744static void
3745cleanaux()
3746{
3747	struct pidlist *savep, *p;
3748	pid_t	pid;
3749	short	status;
3750
3751	(void) sighold(SIGCLD);
3752	Gchild = 0;	/* Note - Safe to do this here since no SIGCLDs */
3753	(void) sighold(SIGPOLL);
3754	savep = p = Plhead;
3755	while (p) {
3756		if (p->pl_dflag) {
3757			/*
3758			 * Found an entry to delete,
3759			 * remove it from list first.
3760			 */
3761			pid = p->pl_pid;
3762			status = p->pl_exit;
3763			if (p == Plhead) {
3764				Plhead = p->pl_next;
3765				p->pl_next = Plfree;
3766				Plfree = p;
3767				savep = p = Plhead;
3768			} else {
3769				savep->pl_next = p->pl_next;
3770				p->pl_next = Plfree;
3771				Plfree = p;
3772				p = savep->pl_next;
3773			}
3774			clearent(pid, status);
3775			continue;
3776		}
3777		savep = p;
3778		p = p->pl_next;
3779	}
3780	(void) sigrelse(SIGPOLL);
3781	(void) sigrelse(SIGCLD);
3782}
3783
3784
3785/*
3786 * /etc/inittab has more entries and we have run out of room in the proc_table
3787 * array. Double the size of proc_table to accomodate the extra entries.
3788 */
3789static void
3790increase_proc_table_size()
3791{
3792	sigset_t block, unblock;
3793	void *ptr;
3794	size_t delta = num_proc * sizeof (struct PROC_TABLE);
3795
3796
3797	/*
3798	 * Block signals for realloc.
3799	 */
3800	(void) sigfillset(&block);
3801	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
3802
3803
3804	/*
3805	 * On failure we just return because callers of this function check
3806	 * for failure.
3807	 */
3808	do
3809		ptr = realloc(g_state, g_state_sz + delta);
3810	while (ptr == NULL && errno == EAGAIN);
3811
3812	if (ptr != NULL) {
3813		/* ensure that the new part is initialized to zero */
3814		bzero((caddr_t)ptr + g_state_sz, delta);
3815
3816		g_state = ptr;
3817		g_state_sz += delta;
3818		num_proc <<= 1;
3819	}
3820
3821
3822	/* unblock our signals before returning */
3823	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
3824}
3825
3826
3827
3828/*
3829 * Sanity check g_state.
3830 */
3831static int
3832st_sane()
3833{
3834	int i;
3835	struct PROC_TABLE *ptp;
3836
3837
3838	/* Note: cur_state is encoded as a signal number */
3839	if (cur_state < 1 || cur_state == 9 || cur_state > 13)
3840		return (0);
3841
3842	/* Check num_proc */
3843	if (g_state_sz != sizeof (struct init_state) + (num_proc - 1) *
3844	    sizeof (struct PROC_TABLE))
3845		return (0);
3846
3847	/* Check proc_table */
3848	for (i = 0, ptp = proc_table; i < num_proc; ++i, ++ptp) {
3849		/* skip unoccupied entries */
3850		if (!(ptp->p_flags & OCCUPIED))
3851			continue;
3852
3853		/* p_flags has no bits outside of PF_MASK */
3854		if (ptp->p_flags & ~(PF_MASK))
3855			return (0);
3856
3857		/* 5 <= pid <= MAXPID */
3858		if (ptp->p_pid < 5 || ptp->p_pid > MAXPID)
3859			return (0);
3860
3861		/* p_count >= 0 */
3862		if (ptp->p_count < 0)
3863			return (0);
3864
3865		/* p_time >= 0 */
3866		if (ptp->p_time < 0)
3867			return (0);
3868	}
3869
3870	return (1);
3871}
3872
3873/*
3874 * Initialize our state.
3875 *
3876 * If the system just booted, then init_state_file, which is located on an
3877 * everpresent tmpfs filesystem, should not exist.
3878 *
3879 * If we were restarted, then init_state_file should exist, in
3880 * which case we'll read it in, sanity check it, and use it.
3881 *
3882 * Note: You can't call console() until proc_table is ready.
3883 */
3884void
3885st_init()
3886{
3887	struct stat stb;
3888	int ret, st_fd, insane = 0;
3889	size_t to_be_read;
3890	char *ptr;
3891
3892
3893	booting = 1;
3894
3895	do {
3896		/*
3897		 * If we can exclusively create the file, then we're the
3898		 * initial invocation of init(1M).
3899		 */
3900		st_fd = open(init_state_file, O_RDWR | O_CREAT | O_EXCL,
3901		    S_IRUSR | S_IWUSR);
3902	} while (st_fd == -1 && errno == EINTR);
3903	if (st_fd != -1)
3904		goto new_state;
3905
3906	booting = 0;
3907
3908	do {
3909		st_fd = open(init_state_file, O_RDWR, S_IRUSR | S_IWUSR);
3910	} while (st_fd == -1 && errno == EINTR);
3911	if (st_fd == -1)
3912		goto new_state;
3913
3914	/* Get the size of the file. */
3915	do
3916		ret = fstat(st_fd, &stb);
3917	while (ret == -1 && errno == EINTR);
3918	if (ret == -1)
3919		goto new_state;
3920
3921	do
3922		g_state = malloc(stb.st_size);
3923	while (g_state == NULL && errno == EAGAIN);
3924	if (g_state == NULL)
3925		goto new_state;
3926
3927	to_be_read = stb.st_size;
3928	ptr = (char *)g_state;
3929	while (to_be_read > 0) {
3930		ssize_t read_ret;
3931
3932		read_ret = read(st_fd, ptr, to_be_read);
3933		if (read_ret < 0) {
3934			if (errno == EINTR)
3935				continue;
3936
3937			goto new_state;
3938		}
3939
3940		to_be_read -= read_ret;
3941		ptr += read_ret;
3942	}
3943
3944	(void) close(st_fd);
3945
3946	g_state_sz = stb.st_size;
3947
3948	if (st_sane()) {
3949		console(B_TRUE, "Restarting.\n");
3950		return;
3951	}
3952
3953	insane = 1;
3954
3955new_state:
3956	if (st_fd >= 0)
3957		(void) close(st_fd);
3958	else
3959		(void) unlink(init_state_file);
3960
3961	if (g_state != NULL)
3962		free(g_state);
3963
3964	/* Something went wrong, so allocate new state. */
3965	g_state_sz = sizeof (struct init_state) +
3966	    ((init_num_proc - 1) * sizeof (struct PROC_TABLE));
3967	do
3968		g_state = calloc(1, g_state_sz);
3969	while (g_state == NULL && errno == EAGAIN);
3970	if (g_state == NULL) {
3971		/* Fatal error! */
3972		exit(errno);
3973	}
3974
3975	g_state->ist_runlevel = -1;
3976	num_proc = init_num_proc;
3977
3978	if (!booting) {
3979		console(B_TRUE, "Restarting.\n");
3980
3981		/* Overwrite the bad state file. */
3982		st_write();
3983
3984		if (!insane) {
3985			console(B_TRUE,
3986			    "Error accessing persistent state file `%s'.  "
3987			    "Ignored.\n", init_state_file);
3988		} else {
3989			console(B_TRUE,
3990			    "Persistent state file `%s' is invalid and was "
3991			    "ignored.\n", init_state_file);
3992		}
3993	}
3994}
3995
3996/*
3997 * Write g_state out to the state file.
3998 */
3999void
4000st_write()
4001{
4002	static int complained = 0;
4003
4004	int st_fd;
4005	char *cp;
4006	size_t sz;
4007	ssize_t ret;
4008
4009
4010	do {
4011		st_fd = open(init_next_state_file,
4012		    O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
4013	} while (st_fd < 0 && errno == EINTR);
4014	if (st_fd < 0)
4015		goto err;
4016
4017	cp = (char *)g_state;
4018	sz = g_state_sz;
4019	while (sz > 0) {
4020		ret = write(st_fd, cp, sz);
4021		if (ret < 0) {
4022			if (errno == EINTR)
4023				continue;
4024
4025			goto err;
4026		}
4027
4028		sz -= ret;
4029		cp += ret;
4030	}
4031
4032	(void) close(st_fd);
4033	st_fd = -1;
4034	if (rename(init_next_state_file, init_state_file)) {
4035		(void) unlink(init_next_state_file);
4036		goto err;
4037	}
4038	complained = 0;
4039
4040	return;
4041
4042err:
4043	if (st_fd >= 0)
4044		(void) close(st_fd);
4045
4046	if (!booting && !complained) {
4047		/*
4048		 * Only complain after the filesystem should have come up.
4049		 * And only do it once so we don't loop between console()
4050		 * & efork().
4051		 */
4052		complained = 1;
4053		if (st_fd)
4054			console(B_TRUE, "Couldn't write persistent state "
4055			    "file `%s'.\n", init_state_file);
4056		else
4057			console(B_TRUE, "Couldn't move persistent state "
4058			    "file `%s' to `%s'.\n", init_next_state_file,
4059			    init_state_file);
4060	}
4061}
4062
4063/*
4064 * Create a contract with these parameters.
4065 */
4066static int
4067contract_make_template(uint_t info, uint_t critical, uint_t fatal,
4068    uint64_t cookie)
4069{
4070	int fd, err;
4071
4072	char *ioctl_tset_emsg =
4073	    "Couldn't set \"%s\" contract template parameter: %s.\n";
4074
4075	do
4076		fd = open64(CTFS_ROOT "/process/template", O_RDWR);
4077	while (fd < 0 && errno == EINTR);
4078	if (fd < 0) {
4079		console(B_TRUE, "Couldn't create process template: %s.\n",
4080		    strerror(errno));
4081		return (-1);
4082	}
4083
4084	if (err = ct_pr_tmpl_set_param(fd, CT_PR_INHERIT | CT_PR_REGENT))
4085		console(B_TRUE, "Contract set template inherit, regent "
4086		    "failed: %s.\n", strerror(err));
4087
4088	/*
4089	 * These errors result in a misconfigured template, which is better
4090	 * than no template at all, so warn but don't abort.
4091	 */
4092	if (err = ct_tmpl_set_informative(fd, info))
4093		console(B_TRUE, ioctl_tset_emsg, "informative", strerror(err));
4094
4095	if (err = ct_tmpl_set_critical(fd, critical))
4096		console(B_TRUE, ioctl_tset_emsg, "critical", strerror(err));
4097
4098	if (err = ct_pr_tmpl_set_fatal(fd, fatal))
4099		console(B_TRUE, ioctl_tset_emsg, "fatal", strerror(err));
4100
4101	if (err = ct_tmpl_set_cookie(fd, cookie))
4102		console(B_TRUE, ioctl_tset_emsg, "cookie", strerror(err));
4103
4104	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4105
4106	return (fd);
4107}
4108
4109/*
4110 * Create the templates and open an event file descriptor.  We use dup2(2) to
4111 * get these descriptors away from the stdin/stdout/stderr group.
4112 */
4113static void
4114contracts_init()
4115{
4116	int err, fd;
4117
4118	/*
4119	 * Create & configure a legacy template.  We only want empty events so
4120	 * we know when to abandon them.
4121	 */
4122	legacy_tmpl = contract_make_template(0, CT_PR_EV_EMPTY, CT_PR_EV_HWERR,
4123	    ORDINARY_COOKIE);
4124	if (legacy_tmpl >= 0) {
4125		err = ct_tmpl_activate(legacy_tmpl);
4126		if (err != 0) {
4127			(void) close(legacy_tmpl);
4128			legacy_tmpl = -1;
4129			console(B_TRUE,
4130			    "Couldn't activate legacy template (%s); "
4131			    "legacy services will be in init's contract.\n",
4132			    strerror(err));
4133		}
4134	} else
4135		console(B_TRUE,
4136		    "Legacy services will be in init's contract.\n");
4137
4138	if (dup2(legacy_tmpl, 255) == -1) {
4139		console(B_TRUE, "Could not duplicate legacy template: %s.\n",
4140		    strerror(errno));
4141	} else {
4142		(void) close(legacy_tmpl);
4143		legacy_tmpl = 255;
4144	}
4145
4146	(void) fcntl(legacy_tmpl, F_SETFD, FD_CLOEXEC);
4147
4148	startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4149	    CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE, STARTD_COOKIE);
4150
4151	if (dup2(startd_tmpl, 254) == -1) {
4152		console(B_TRUE, "Could not duplicate startd template: %s.\n",
4153		    strerror(errno));
4154	} else {
4155		(void) close(startd_tmpl);
4156		startd_tmpl = 254;
4157	}
4158
4159	(void) fcntl(startd_tmpl, F_SETFD, FD_CLOEXEC);
4160
4161	if (legacy_tmpl < 0 && startd_tmpl < 0) {
4162		/* The creation errors have already been reported. */
4163		console(B_TRUE,
4164		    "Ignoring contract events.  Core smf(5) services will not "
4165		    "be restarted.\n");
4166		return;
4167	}
4168
4169	/*
4170	 * Open an event endpoint.
4171	 */
4172	do
4173		fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
4174	while (fd < 0 && errno == EINTR);
4175	if (fd < 0) {
4176		console(B_TRUE,
4177		    "Couldn't open process pbundle: %s.  Core smf(5) services "
4178		    "will not be restarted.\n", strerror(errno));
4179		return;
4180	}
4181
4182	if (dup2(fd, 253) == -1) {
4183		console(B_TRUE, "Could not duplicate process bundle: %s.\n",
4184		    strerror(errno));
4185	} else {
4186		(void) close(fd);
4187		fd = 253;
4188	}
4189
4190	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4191
4192	/* Reset in case we've been restarted. */
4193	(void) ct_event_reset(fd);
4194
4195	poll_fds[0].fd = fd;
4196	poll_fds[0].events = POLLIN;
4197	poll_nfds = 1;
4198}
4199
4200static int
4201contract_getfile(ctid_t id, const char *name, int oflag)
4202{
4203	int fd;
4204
4205	do
4206		fd = contract_open(id, "process", name, oflag);
4207	while (fd < 0 && errno == EINTR);
4208
4209	if (fd < 0)
4210		console(B_TRUE, "Couldn't open %s for contract %ld: %s.\n",
4211		    name, id, strerror(errno));
4212
4213	return (fd);
4214}
4215
4216static int
4217contract_cookie(ctid_t id, uint64_t *cp)
4218{
4219	int fd, err;
4220	ct_stathdl_t sh;
4221
4222	fd = contract_getfile(id, "status", O_RDONLY);
4223	if (fd < 0)
4224		return (-1);
4225
4226	err = ct_status_read(fd, CTD_COMMON, &sh);
4227	if (err != 0) {
4228		console(B_TRUE, "Couldn't read status of contract %ld: %s.\n",
4229		    id, strerror(err));
4230		(void) close(fd);
4231		return (-1);
4232	}
4233
4234	(void) close(fd);
4235
4236	*cp = ct_status_get_cookie(sh);
4237
4238	ct_status_free(sh);
4239	return (0);
4240}
4241
4242static void
4243contract_ack(ct_evthdl_t e)
4244{
4245	int fd;
4246
4247	if (ct_event_get_flags(e) & CTE_INFO)
4248		return;
4249
4250	fd = contract_getfile(ct_event_get_ctid(e), "ctl", O_WRONLY);
4251	if (fd < 0)
4252		return;
4253
4254	(void) ct_ctl_ack(fd, ct_event_get_evid(e));
4255	(void) close(fd);
4256}
4257
4258/*
4259 * Process a contract event.
4260 */
4261static void
4262contract_event(struct pollfd *poll)
4263{
4264	ct_evthdl_t e;
4265	int err;
4266	ctid_t ctid;
4267
4268	if (!(poll->revents & POLLIN)) {
4269		if (poll->revents & POLLERR)
4270			console(B_TRUE,
4271			    "Unknown poll error on my process contract "
4272			    "pbundle.\n");
4273		return;
4274	}
4275
4276	err = ct_event_read(poll->fd, &e);
4277	if (err != 0) {
4278		console(B_TRUE, "Error retrieving contract event: %s.\n",
4279		    strerror(err));
4280		return;
4281	}
4282
4283	ctid = ct_event_get_ctid(e);
4284
4285	if (ct_event_get_type(e) == CT_PR_EV_EMPTY) {
4286		uint64_t cookie;
4287		int ret, abandon = 1;
4288
4289		/* If it's svc.startd, restart it.  Else, abandon. */
4290		ret = contract_cookie(ctid, &cookie);
4291
4292		if (ret == 0) {
4293			if (cookie == STARTD_COOKIE &&
4294			    do_restart_startd) {
4295				if (smf_debug)
4296					console(B_TRUE, "Restarting "
4297					    "svc.startd.\n");
4298
4299				/*
4300				 * Account for the failure.  If the failure rate
4301				 * exceeds a threshold, then drop to maintenance
4302				 * mode.
4303				 */
4304				startd_record_failure();
4305				if (startd_failure_rate_critical())
4306					enter_maintenance();
4307
4308				if (startd_tmpl < 0)
4309					console(B_TRUE,
4310					    "Restarting svc.startd in "
4311					    "improper contract (bad "
4312					    "template).\n");
4313
4314				(void) startd_run(startd_cline, startd_tmpl,
4315				    ctid);
4316
4317				abandon = 0;
4318			}
4319		}
4320
4321		if (abandon && (err = contract_abandon_id(ctid))) {
4322			console(B_TRUE, "Couldn't abandon contract %ld: %s.\n",
4323			    ctid, strerror(err));
4324		}
4325
4326		/*
4327		 * No need to acknowledge the event since either way the
4328		 * originating contract should be abandoned.
4329		 */
4330	} else {
4331		console(B_TRUE,
4332		    "Received contract event of unexpected type %d from "
4333		    "contract %ld.\n", ct_event_get_type(e), ctid);
4334
4335		if ((ct_event_get_flags(e) & (CTE_INFO | CTE_ACK)) == 0)
4336			/* Allow unexpected critical events to be released. */
4337			contract_ack(e);
4338	}
4339
4340	ct_event_free(e);
4341}
4342
4343/*
4344 * svc.startd(1M) Management
4345 */
4346
4347/*
4348 * (Re)start svc.startd(1M).  old_ctid should be the contract ID of the old
4349 * contract, or 0 if we're starting it for the first time.  If wait is true
4350 * we'll wait for and return the exit value of the child.
4351 */
4352static int
4353startd_run(const char *cline, int tmpl, ctid_t old_ctid)
4354{
4355	int err, i, ret, did_activate;
4356	pid_t pid;
4357	struct stat sb;
4358
4359	if (cline[0] == '\0')
4360		return (-1);
4361
4362	/*
4363	 * Don't restart startd if the system is rebooting or shutting down.
4364	 */
4365	do {
4366		ret = stat("/etc/svc/volatile/resetting", &sb);
4367	} while (ret == -1 && errno == EINTR);
4368
4369	if (ret == 0) {
4370		if (smf_debug)
4371			console(B_TRUE, "Quiescing for reboot.\n");
4372		(void) pause();
4373		return (-1);
4374	}
4375
4376	err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4377	if (err == EINVAL) {
4378		console(B_TRUE, "Remake startd_tmpl; reattempt transfer.\n");
4379		tmpl = startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4380		    CT_PR_EV_HWERR, STARTD_COOKIE);
4381
4382		err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4383	}
4384	if (err != 0) {
4385		console(B_TRUE,
4386		    "Couldn't set transfer parameter of contract template: "
4387		    "%s.\n", strerror(err));
4388	}
4389
4390	if ((err = ct_pr_tmpl_set_svc_fmri(startd_tmpl,
4391	    SCF_SERVICE_STARTD)) != 0)
4392		console(B_TRUE,
4393		    "Can not set svc_fmri in contract template: %s\n",
4394		    strerror(err));
4395	if ((err = ct_pr_tmpl_set_svc_aux(startd_tmpl,
4396	    startd_svc_aux)) != 0)
4397		console(B_TRUE,
4398		    "Can not set svc_aux in contract template: %s\n",
4399		    strerror(err));
4400	did_activate = !(ct_tmpl_activate(tmpl));
4401	if (!did_activate)
4402		console(B_TRUE,
4403		    "Template activation failed; not starting \"%s\" in "
4404		    "proper contract.\n", cline);
4405
4406	/* Hold SIGCLD so we can wait if necessary. */
4407	(void) sighold(SIGCLD);
4408
4409	while ((pid = fork()) < 0) {
4410		if (errno == EPERM) {
4411			console(B_TRUE, "Insufficient permission to fork.\n");
4412
4413			/* Now that's a doozy. */
4414			exit(1);
4415		}
4416
4417		console(B_TRUE,
4418		    "fork() for svc.startd failed: %s.  Will retry in 1 "
4419		    "second...\n", strerror(errno));
4420
4421		(void) sleep(1);
4422
4423		/* Eventually give up? */
4424	}
4425
4426	if (pid == 0) {
4427		/* child */
4428
4429		/* See the comment in efork() */
4430		for (i = SIGHUP; i <= SIGRTMAX; ++i) {
4431			if (i == SIGTTOU || i == SIGTTIN || i == SIGTSTP)
4432				(void) sigset(i, SIG_IGN);
4433			else
4434				(void) sigset(i, SIG_DFL);
4435		}
4436
4437		if (smf_options != NULL) {
4438			/* Put smf_options in the environment. */
4439			glob_envp[glob_envn] =
4440			    malloc(sizeof ("SMF_OPTIONS=") - 1 +
4441			    strlen(smf_options) + 1);
4442
4443			if (glob_envp[glob_envn] != NULL) {
4444				/* LINTED */
4445				(void) sprintf(glob_envp[glob_envn],
4446				    "SMF_OPTIONS=%s", smf_options);
4447				glob_envp[glob_envn+1] = NULL;
4448			} else {
4449				console(B_TRUE,
4450				    "Could not set SMF_OPTIONS (%s).\n",
4451				    strerror(errno));
4452			}
4453		}
4454
4455		if (smf_debug)
4456			console(B_TRUE, "Executing svc.startd\n");
4457
4458		(void) execle(SH, "INITSH", "-c", cline, NULL, glob_envp);
4459
4460		console(B_TRUE, "Could not exec \"%s\" (%s).\n", SH,
4461		    strerror(errno));
4462
4463		exit(1);
4464	}
4465
4466	/* parent */
4467
4468	if (did_activate) {
4469		if (legacy_tmpl < 0 || ct_tmpl_activate(legacy_tmpl) != 0)
4470			(void) ct_tmpl_clear(tmpl);
4471	}
4472
4473	/* Clear the old_ctid reference so the kernel can reclaim it. */
4474	if (old_ctid != 0)
4475		(void) ct_pr_tmpl_set_transfer(tmpl, 0);
4476
4477	(void) sigrelse(SIGCLD);
4478
4479	return (0);
4480}
4481
4482/*
4483 * void startd_record_failure(void)
4484 *   Place the current time in our circular array of svc.startd failures.
4485 */
4486void
4487startd_record_failure()
4488{
4489	int index = startd_failure_index++ % NSTARTD_FAILURE_TIMES;
4490
4491	startd_failure_time[index] = gethrtime();
4492}
4493
4494/*
4495 * int startd_failure_rate_critical(void)
4496 *   Return true if the average failure interval is less than the permitted
4497 *   interval.  Implicit success if insufficient measurements for an average
4498 *   exist.
4499 */
4500int
4501startd_failure_rate_critical()
4502{
4503	int n = startd_failure_index;
4504	hrtime_t avg_ns = 0;
4505
4506	if (startd_failure_index < NSTARTD_FAILURE_TIMES)
4507		return (0);
4508
4509	avg_ns =
4510	    (startd_failure_time[(n - 1) % NSTARTD_FAILURE_TIMES] -
4511	    startd_failure_time[n % NSTARTD_FAILURE_TIMES]) /
4512	    NSTARTD_FAILURE_TIMES;
4513
4514	return (avg_ns < STARTD_FAILURE_RATE_NS);
4515}
4516
4517/*
4518 * returns string that must be free'd
4519 */
4520
4521static char
4522*audit_boot_msg()
4523{
4524	char		*b, *p;
4525	char		desc[] = "booted";
4526	zoneid_t	zid = getzoneid();
4527
4528	b = malloc(sizeof (desc) + MAXNAMELEN + 3);
4529	if (b == NULL)
4530		return (b);
4531
4532	p = b;
4533	p += strlcpy(p, desc, sizeof (desc));
4534	if (zid != GLOBAL_ZONEID) {
4535		p += strlcpy(p, ": ", 3);
4536		(void) getzonenamebyid(zid, p, MAXNAMELEN);
4537	}
4538	return (b);
4539}
4540
4541/*
4542 * Generate AUE_init_solaris audit record.  Return 1 if
4543 * auditing is enabled in case the caller cares.
4544 *
4545 * In the case of userint() or a local zone invocation of
4546 * one_true_init, the process initially contains the audit
4547 * characteristics of the process that invoked init.  The first pass
4548 * through here uses those characteristics then for the case of
4549 * one_true_init in a local zone, clears them so subsequent system
4550 * state changes won't be attributed to the person who booted the
4551 * zone.
4552 */
4553static int
4554audit_put_record(int pass_fail, int status, char *msg)
4555{
4556	adt_session_data_t	*ah;
4557	adt_event_data_t	*event;
4558
4559	if (!adt_audit_enabled())
4560		return (0);
4561
4562	/*
4563	 * the PROC_DATA picks up the context to tell whether this is
4564	 * an attributed record (auid = -2 is unattributed)
4565	 */
4566	if (adt_start_session(&ah, NULL, ADT_USE_PROC_DATA)) {
4567		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4568		return (1);
4569	}
4570	event = adt_alloc_event(ah, ADT_init_solaris);
4571	if (event == NULL) {
4572		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4573		(void) adt_end_session(ah);
4574		return (1);
4575	}
4576	event->adt_init_solaris.info = msg;	/* NULL is ok here */
4577
4578	if (adt_put_event(event, pass_fail, status)) {
4579		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4580		(void) adt_end_session(ah);
4581		return (1);
4582	}
4583	adt_free_event(event);
4584
4585	(void) adt_end_session(ah);
4586
4587	return (1);
4588}
4589