init.c revision 6073:47f6aa7a8077
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28/*	  All Rights Reserved  	*/
29
30/*
31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 * The Regents of the University of California
33 * All Rights Reserved
34 *
35 * University Acknowledgment- Portions of this document are derived from
36 * software developed by the University of California, Berkeley, and its
37 * contributors.
38 */
39
40#pragma ident	"%Z%%M%	%I%	%E% SMI"
41
42/*
43 * init(1M) is the general process spawning program.  Its primary job is to
44 * start and restart svc.startd for smf(5).  For backwards-compatibility it also
45 * spawns and respawns processes according to /etc/inittab and the current
46 * run-level.  It reads /etc/default/inittab for general configuration.
47 *
48 * To change run-levels the system administrator runs init from the command
49 * line with a level name.  init signals svc.startd via libscf and directs the
50 * zone's init (pid 1 in the global zone) what to do by sending it a signal;
51 * these signal numbers are commonly refered to in the code as 'states'.  Valid
52 * run-levels are [sS0123456].  Additionally, init can be given directives
53 * [qQabc], which indicate actions to be taken pertaining to /etc/inittab.
54 *
55 * When init processes inittab entries, it finds processes that are to be
56 * spawned at various run-levels.  inittab contains the set of the levels for
57 * which each inittab entry is valid.
58 *
59 * State File and Restartability
60 *   Premature exit by init(1M) is handled as a special case by the kernel:
61 *   init(1M) will be immediately re-executed, retaining its original PID.  (PID
62 *   1 in the global zone.)  To track the processes it has previously spawned,
63 *   as well as other mutable state, init(1M) regularly updates a state file
64 *   such that its subsequent invocations have knowledge of its various
65 *   dependent processes and duties.
66 *
67 * Process Contracts
68 *   We start svc.startd(1M) in a contract and transfer inherited contracts when
69 *   restarting it.  Everything else is started using the legacy contract
70 *   template, and the created contracts are abandoned when they become empty.
71 *
72 * utmpx Entry Handling
73 *   Because init(1M) no longer governs the startup process, its knowledge of
74 *   when utmpx becomes writable is indirect.  However, spawned processes
75 *   expect to be constructed with valid utmpx entries.  As a result, attempts
76 *   to write normal entries will be retried until successful.
77 *
78 * Maintenance Mode
79 *   In certain failure scenarios, init(1M) will enter a maintenance mode, in
80 *   which it invokes sulogin(1M) to allow the operator an opportunity to
81 *   repair the system.  Normally, this operation is performed as a
82 *   fork(2)-exec(2)-waitpid(3C) sequence with the parent waiting for repair or
83 *   diagnosis to be completed.  In the cases that fork(2) requests themselves
84 *   fail, init(1M) will directly execute sulogin(1M), and allow the kernel to
85 *   restart init(1M) on exit from the operator session.
86 *
87 *   One scenario where init(1M) enters its maintenance mode is when
88 *   svc.startd(1M) begins to fail rapidly, defined as when the average time
89 *   between recent failures drops below a given threshold.
90 */
91
92#include <sys/contract/process.h>
93#include <sys/ctfs.h>
94#include <sys/stat.h>
95#include <sys/statvfs.h>
96#include <sys/stropts.h>
97#include <sys/systeminfo.h>
98#include <sys/time.h>
99#include <sys/termios.h>
100#include <sys/tty.h>
101#include <sys/types.h>
102#include <sys/utsname.h>
103
104#include <bsm/adt_event.h>
105#include <bsm/libbsm.h>
106#include <security/pam_appl.h>
107
108#include <assert.h>
109#include <ctype.h>
110#include <dirent.h>
111#include <errno.h>
112#include <fcntl.h>
113#include <libcontract.h>
114#include <libcontract_priv.h>
115#include <libintl.h>
116#include <libscf.h>
117#include <libscf_priv.h>
118#include <poll.h>
119#include <procfs.h>
120#include <signal.h>
121#include <stdarg.h>
122#include <stdio.h>
123#include <stdio_ext.h>
124#include <stdlib.h>
125#include <string.h>
126#include <strings.h>
127#include <syslog.h>
128#include <time.h>
129#include <ulimit.h>
130#include <unistd.h>
131#include <utmpx.h>
132#include <wait.h>
133#include <zone.h>
134#include <ucontext.h>
135
136#undef	sleep
137
138#define	fioctl(p, sptr, cmd)	ioctl(fileno(p), sptr, cmd)
139#define	min(a, b)		(((a) < (b)) ? (a) : (b))
140
141#define	TRUE	1
142#define	FALSE	0
143#define	FAILURE	-1
144
145#define	UT_LINE_SZ	32	/* Size of a utmpx ut_line field */
146
147/*
148 * SLEEPTIME	The number of seconds "init" sleeps between wakeups if
149 *		nothing else requires this "init" wakeup.
150 */
151#define	SLEEPTIME	(5 * 60)
152
153/*
154 * MAXCMDL	The maximum length of a command string in inittab.
155 */
156#define	MAXCMDL	512
157
158/*
159 * EXEC		The length of the prefix string added to all comamnds
160 *		found in inittab.
161 */
162#define	EXEC	(sizeof ("exec ") - 1)
163
164/*
165 * TWARN	The amount of time between warning signal, SIGTERM,
166 *		and the fatal kill signal, SIGKILL.
167 */
168#define	TWARN	5
169
170#define	id_eq(x, y)	((x[0] == y[0] && x[1] == y[1] && x[2] == y[2] &&\
171			x[3] == y[3]) ? TRUE : FALSE)
172
173/*
174 * The kernel's default umask is 022 these days; since some processes inherit
175 * their umask from init, init will set it from CMASK in /etc/default/init.
176 * init gets the default umask from the kernel, it sets it to 022 whenever
177 * it wants to create a file and reverts to CMASK afterwards.
178 */
179
180static int cmask;
181
182/*
183 * The following definitions, concluding with the 'lvls' array, provide a
184 * common mapping between level-name (like 'S'), signal number (state),
185 * run-level mask, and specific properties associated with a run-level.
186 * This array should be accessed using the routines lvlname_to_state(),
187 * lvlname_to_mask(), state_to_mask(), and state_to_flags().
188 */
189
190/*
191 * Correspondence of signals to init actions.
192 */
193#define	LVLQ		SIGHUP
194#define	LVL0		SIGINT
195#define	LVL1		SIGQUIT
196#define	LVL2		SIGILL
197#define	LVL3		SIGTRAP
198#define	LVL4		SIGIOT
199#define	LVL5		SIGEMT
200#define	LVL6		SIGFPE
201#define	SINGLE_USER	SIGBUS
202#define	LVLa		SIGSEGV
203#define	LVLb		SIGSYS
204#define	LVLc		SIGPIPE
205
206/*
207 * Bit Mask for each level.  Used to determine legal levels.
208 */
209#define	MASK0	0x0001
210#define	MASK1	0x0002
211#define	MASK2	0x0004
212#define	MASK3	0x0008
213#define	MASK4	0x0010
214#define	MASK5	0x0020
215#define	MASK6	0x0040
216#define	MASKSU	0x0080
217#define	MASKa	0x0100
218#define	MASKb	0x0200
219#define	MASKc	0x0400
220
221#define	MASK_NUMERIC (MASK0 | MASK1 | MASK2 | MASK3 | MASK4 | MASK5 | MASK6)
222#define	MASK_abc (MASKa | MASKb | MASKc)
223
224/*
225 * Flags to indicate properties of various states.
226 */
227#define	LSEL_RUNLEVEL	0x0001	/* runlevels you can transition to */
228
229typedef struct lvl {
230	int	lvl_state;
231	int	lvl_mask;
232	char	lvl_name;
233	int	lvl_flags;
234} lvl_t;
235
236static lvl_t lvls[] = {
237	{ LVLQ,		0,	'Q', 0					},
238	{ LVLQ,		0,	'q', 0					},
239	{ LVL0,		MASK0,	'0', LSEL_RUNLEVEL			},
240	{ LVL1, 	MASK1,	'1', LSEL_RUNLEVEL			},
241	{ LVL2, 	MASK2,	'2', LSEL_RUNLEVEL			},
242	{ LVL3, 	MASK3,	'3', LSEL_RUNLEVEL			},
243	{ LVL4, 	MASK4,	'4', LSEL_RUNLEVEL			},
244	{ LVL5, 	MASK5,	'5', LSEL_RUNLEVEL			},
245	{ LVL6, 	MASK6, 	'6', LSEL_RUNLEVEL			},
246	{ SINGLE_USER, 	MASKSU, 'S', LSEL_RUNLEVEL			},
247	{ SINGLE_USER, 	MASKSU, 's', LSEL_RUNLEVEL			},
248	{ LVLa,		MASKa,	'a', 0					},
249	{ LVLb,		MASKb,	'b', 0					},
250	{ LVLc,		MASKc,	'c', 0					}
251};
252
253#define	LVL_NELEMS (sizeof (lvls) / sizeof (lvl_t))
254
255/*
256 * Legal action field values.
257 */
258#define	OFF		0	/* Kill process if on, else ignore */
259#define	RESPAWN		1	/* Continuously restart process when it dies */
260#define	ONDEMAND	RESPAWN	/* Respawn for a, b, c type processes */
261#define	ONCE		2	/* Start process, do not respawn when dead */
262#define	WAIT		3	/* Perform once and wait to complete */
263#define	BOOT		4	/* Start at boot time only */
264#define	BOOTWAIT	5	/* Start at boot time and wait to complete */
265#define	POWERFAIL	6	/* Start on powerfail */
266#define	POWERWAIT	7	/* Start and wait for complete on powerfail */
267#define	INITDEFAULT	8	/* Default level "init" should start at */
268#define	SYSINIT		9	/* Actions performed before init speaks */
269
270#define	M_OFF		0001
271#define	M_RESPAWN	0002
272#define	M_ONDEMAND	M_RESPAWN
273#define	M_ONCE		0004
274#define	M_WAIT		0010
275#define	M_BOOT		0020
276#define	M_BOOTWAIT	0040
277#define	M_PF		0100
278#define	M_PWAIT		0200
279#define	M_INITDEFAULT	0400
280#define	M_SYSINIT	01000
281
282/* States for the inittab parser in getcmd(). */
283#define	ID	1
284#define	LEVELS	2
285#define	ACTION	3
286#define	COMMAND	4
287#define	COMMENT	5
288
289/*
290 * inittab entry id constants
291 */
292#define	INITTAB_ENTRY_ID_SIZE 4
293#define	INITTAB_ENTRY_ID_STR_FORMAT "%.4s"	/* if INITTAB_ENTRY_ID_SIZE */
294						/* changes, this should */
295						/* change accordingly */
296
297/*
298 * Init can be in any of three main states, "normal" mode where it is
299 * processing entries for the lines file in a normal fashion, "boot" mode,
300 * where it is only interested in the boot actions, and "powerfail" mode,
301 * where it is only interested in powerfail related actions. The following
302 * masks declare the legal actions for each mode.
303 */
304#define	NORMAL_MODES	(M_OFF | M_RESPAWN | M_ONCE | M_WAIT)
305#define	BOOT_MODES	(M_BOOT | M_BOOTWAIT)
306#define	PF_MODES	(M_PF | M_PWAIT)
307
308struct PROC_TABLE {
309	char	p_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
310						/* process */
311	pid_t	p_pid;		/* Process id */
312	short	p_count;	/* How many respawns of this command in */
313				/*   the current series */
314	long	p_time;		/* Start time for a series of respawns */
315	short	p_flags;
316	short	p_exit;		/* Exit status of a process which died */
317};
318
319/*
320 * Flags for the "p_flags" word of a PROC_TABLE entry:
321 *
322 *	OCCUPIED	This slot in init's proc table is in use.
323 *
324 *	LIVING		Process is alive.
325 *
326 *	NOCLEANUP	efork() is not allowed to cleanup this entry even
327 *			if process is dead.
328 *
329 *	NAMED		This process has a name, i.e. came from inittab.
330 *
331 *	DEMANDREQUEST	Process started by a "telinit [abc]" command.  Processes
332 *			formed this way are respawnable and immune to level
333 *			changes as long as their entry exists in inittab.
334 *
335 *	TOUCHED		Flag used by remv() to determine whether it has looked
336 *			at an entry while checking for processes to be killed.
337 *
338 *	WARNED		Flag used by remv() to mark processes that have been
339 *			sent the SIGTERM signal.  If they don't die in 5
340 *			seconds, they are sent the SIGKILL signal.
341 *
342 *	KILLED		Flag used by remv() to mark procs that have been sent
343 *			the SIGTERM and SIGKILL signals.
344 *
345 *	PF_MASK		Bitwise or of legal flags, for sanity checking.
346 */
347#define	OCCUPIED	01
348#define	LIVING		02
349#define	NOCLEANUP	04
350#define	NAMED		010
351#define	DEMANDREQUEST	020
352#define	TOUCHED		040
353#define	WARNED		0100
354#define	KILLED		0200
355#define	PF_MASK		0377
356
357/*
358 * Respawn limits for processes that are to be respawned:
359 *
360 *	SPAWN_INTERVAL	The number of seconds over which "init" will try to
361 *			respawn a process SPAWN_LIMIT times before it gets mad.
362 *
363 *	SPAWN_LIMIT	The number of respawns "init" will attempt in
364 *			SPAWN_INTERVAL seconds before it generates an
365 *			error message and inhibits further tries for
366 *			INHIBIT seconds.
367 *
368 *	INHIBIT		The number of seconds "init" ignores an entry it had
369 *			trouble spawning unless a "telinit Q" is received.
370 */
371
372#define	SPAWN_INTERVAL	(2*60)
373#define	SPAWN_LIMIT	10
374#define	INHIBIT		(5*60)
375
376/*
377 * The maximum number of decimal digits for an id_t.  (ceil(log10 (max_id)))
378 */
379#define	ID_MAX_STR_LEN	10
380
381#define	NULLPROC	((struct PROC_TABLE *)(0))
382#define	NO_ROOM		((struct PROC_TABLE *)(FAILURE))
383
384struct CMD_LINE {
385	char c_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
386						/* process to be affected by */
387						/* action */
388	short c_levels;	/* Mask of legal levels for process */
389	short c_action;	/* Mask for type of action required */
390	char *c_command; /* Pointer to init command */
391};
392
393struct	pidrec {
394	int	pd_type;	/* Command type */
395	pid_t	pd_pid;		/* pid to add or remove */
396};
397
398/*
399 * pd_type's
400 */
401#define	ADDPID	1
402#define	REMPID	2
403
404static struct	pidlist {
405	pid_t	pl_pid;		/* pid to watch for */
406	int	pl_dflag;	/* Flag indicating SIGCLD from this pid */
407	short	pl_exit;	/* Exit status of proc */
408	struct	pidlist	*pl_next; /* Next in list */
409} *Plhead, *Plfree;
410
411/*
412 * The following structure contains a set of modes for /dev/syscon
413 * and should match the default contents of /etc/ioctl.syscon.
414 */
415static struct termios	dflt_termios = {
416	BRKINT|ICRNL|IXON|IMAXBEL,			/* iflag */
417	OPOST|ONLCR|TAB3,				/* oflag */
418	CS8|CREAD|B9600,				/* cflag */
419	ISIG|ICANON|ECHO|ECHOE|ECHOK|ECHOCTL|ECHOKE|IEXTEN, /* lflag */
420	CINTR, CQUIT, CERASE, CKILL, CEOF, 0, 0, 0,
421	0, 0, 0, 0, 0, 0, 0, 0,
422	0, 0, 0
423};
424
425static struct termios	stored_syscon_termios;
426static int		write_ioctl = 0;	/* Rewrite /etc/ioctl.syscon */
427
428static union WAKEUP {
429	struct WAKEFLAGS {
430		unsigned w_usersignal : 1;	/* User sent signal to "init" */
431		unsigned w_childdeath : 1;	/* An "init" child died */
432		unsigned w_powerhit : 1;	/* OS experienced powerfail */
433	}	w_flags;
434	int w_mask;
435} wakeup;
436
437
438struct init_state {
439	int			ist_runlevel;
440	int			ist_num_proc;
441	int			ist_utmpx_ok;
442	struct PROC_TABLE	ist_proc_table[1];
443};
444
445#define	cur_state	(g_state->ist_runlevel)
446#define	num_proc	(g_state->ist_num_proc)
447#define	proc_table	(g_state->ist_proc_table)
448#define	utmpx_ok	(g_state->ist_utmpx_ok)
449
450/* Contract cookies. */
451#define	ORDINARY_COOKIE		0
452#define	STARTD_COOKIE		1
453
454
455#ifndef NDEBUG
456#define	bad_error(func, err)	{					\
457	(void) fprintf(stderr, "%s:%d: %s() failed with unexpected "	\
458	    "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err)); \
459	abort();							\
460}
461#else
462#define	bad_error(func, err)	abort()
463#endif
464
465
466/*
467 * Useful file and device names.
468 */
469static char *CONSOLE	  = "/dev/console";	/* Real system console */
470static char *INITPIPE_DIR = "/var/run";
471static char *INITPIPE	  = "/var/run/initpipe";
472
473#define	INIT_STATE_DIR "/etc/svc/volatile"
474static const char * const init_state_file = INIT_STATE_DIR "/init.state";
475static const char * const init_next_state_file =
476	INIT_STATE_DIR "/init-next.state";
477
478static const int init_num_proc = 20;	/* Initial size of process table. */
479
480static char *UTMPX	 = UTMPX_FILE;		/* Snapshot record file */
481static char *WTMPX	 = WTMPX_FILE;		/* Long term record file */
482static char *INITTAB	 = "/etc/inittab";	/* Script file for "init" */
483static char *SYSTTY	 = "/dev/systty";	/* System Console */
484static char *SYSCON	 = "/dev/syscon";	/* Virtual System console */
485static char *IOCTLSYSCON = "/etc/ioctl.syscon";	/* Last syscon modes */
486static char *ENVFILE	 = "/etc/default/init";	/* Default env. */
487static char *SU	= "/etc/sulogin";	/* Super-user program for single user */
488static char *SH	= "/sbin/sh";		/* Standard shell */
489
490/*
491 * Default Path.  /sbin is included in path only during sysinit phase
492 */
493#define	DEF_PATH	"PATH=/usr/sbin:/usr/bin"
494#define	INIT_PATH	"PATH=/sbin:/usr/sbin:/usr/bin"
495
496static int	prior_state;
497static int	prev_state;	/* State "init" was in last time it woke */
498static int	new_state;	/* State user wants "init" to go to. */
499static int	lvlq_received;	/* Explicit request to examine state */
500static int	op_modes = BOOT_MODES; /* Current state of "init" */
501static int	Gchild = 0;	/* Flag to indicate "godchild" died, set in */
502				/*   childeath() and cleared in cleanaux() */
503static int	Pfd = -1;	/* fd to receive pids thru */
504static unsigned int	spawncnt, pausecnt;
505static int	rsflag;		/* Set if a respawn has taken place */
506static volatile int time_up;	/* Flag set to TRUE by the alarm interrupt */
507				/* routine each time an alarm interrupt */
508				/* takes place. */
509static int	sflg = 0;	/* Set if we were booted -s to single user */
510static int	rflg = 0;	/* Set if booted -r, reconfigure devices */
511static int	bflg = 0;	/* Set if booted -b, don't run rc scripts */
512static pid_t	init_pid;	/* PID of "one true" init for current zone */
513
514static struct init_state *g_state = NULL;
515static size_t	g_state_sz;
516static int	booting = 1;	/* Set while we're booting. */
517
518/*
519 * Array for default global environment.
520 */
521#define	MAXENVENT	24	/* Max number of default env variables + 1 */
522				/* init can use three itself, so this leaves */
523				/* 20 for the administrator in ENVFILE. */
524static char	*glob_envp[MAXENVENT];	/* Array of environment strings */
525static int	glob_envn;		/* Number of environment strings */
526
527
528static struct pollfd	poll_fds[1];
529static int		poll_nfds = 0;	/* poll_fds is uninitialized */
530
531/*
532 * Contracts constants
533 */
534#define	SVC_INIT_PREFIX "init:/"
535#define	SVC_AUX_SIZE (INITTAB_ENTRY_ID_SIZE + 1)
536#define	SVC_FMRI_SIZE (sizeof (SVC_INIT_PREFIX) + INITTAB_ENTRY_ID_SIZE)
537
538static int	legacy_tmpl = -1;	/* fd for legacy contract template */
539static int	startd_tmpl = -1;	/* fd for svc.startd's template */
540static char	startd_svc_aux[SVC_AUX_SIZE];
541
542static char	startd_cline[256] = "";	/* svc.startd's command line */
543static int	do_restart_startd = 1;	/* Whether to restart svc.startd. */
544static char	*smf_options = NULL;	/* Options to give to startd. */
545static int	smf_debug = 0;		/* Messages for debugging smf(5) */
546static time_t	init_boot_time;		/* Substitute for kernel boot time. */
547
548#define	NSTARTD_FAILURE_TIMES	3		/* trigger after 3 failures */
549#define	STARTD_FAILURE_RATE_NS	5000000000LL	/* 1 failure/5 seconds */
550
551static hrtime_t	startd_failure_time[NSTARTD_FAILURE_TIMES];
552static uint_t	startd_failure_index;
553
554
555static char	*prog_name(char *);
556static int	state_to_mask(int);
557static int	lvlname_to_mask(char, int *);
558static void	lscf_set_runlevel(char);
559static int	state_to_flags(int);
560static char	state_to_name(int);
561static int	lvlname_to_state(char);
562static int	getcmd(struct CMD_LINE *, char *);
563static int	realcon();
564static int	spawn_processes();
565static int	get_ioctl_syscon();
566static int	account(short, struct PROC_TABLE *, char *);
567static void	alarmclk();
568static void	childeath(int);
569static void	cleanaux();
570static void	clearent(pid_t, short);
571static void	console(boolean_t, char *, ...);
572static void	init_signals(void);
573static void	setup_pipe();
574static void	killproc(pid_t);
575static void	init_env();
576static void	boot_init();
577static void	powerfail();
578static void	remv();
579static void	write_ioctl_syscon();
580static void	spawn(struct PROC_TABLE *, struct CMD_LINE *);
581static void	setimer(int);
582static void	siglvl(int, siginfo_t *, ucontext_t *);
583static void	sigpoll(int);
584static void	enter_maintenance(void);
585static void	timer(int);
586static void	userinit(int, char **);
587static void	notify_pam_dead(struct utmpx *);
588static long	waitproc(struct PROC_TABLE *);
589static struct PROC_TABLE *efork(int, struct PROC_TABLE *, int);
590static struct PROC_TABLE *findpslot(struct CMD_LINE *);
591static void	increase_proc_table_size();
592static void	st_init();
593static void	st_write();
594static void	contracts_init();
595static void	contract_event(struct pollfd *);
596static int	startd_run(const char *, int, ctid_t);
597static void	startd_record_failure();
598static int	startd_failure_rate_critical();
599static char	*audit_boot_msg();
600static int	audit_put_record(int, int, char *);
601static void	update_boot_archive(int new_state);
602
603int
604main(int argc, char *argv[])
605{
606	int	chg_lvl_flag = FALSE, print_banner = FALSE;
607	int	may_need_audit = 1;
608	int	c;
609	char	*msg;
610
611	/* Get a timestamp for use as boot time, if needed. */
612	(void) time(&init_boot_time);
613
614	/* Get the default umask */
615	cmask = umask(022);
616	(void) umask(cmask);
617
618	/* Parse the arguments to init. Check for single user */
619	opterr = 0;
620	while ((c = getopt(argc, argv, "brsm:")) != EOF) {
621		switch (c) {
622		case 'b':
623			rflg = 0;
624			bflg = 1;
625			if (!sflg)
626				sflg++;
627			break;
628		case 'r':
629			bflg = 0;
630			rflg++;
631			break;
632		case 's':
633			if (!bflg)
634				sflg++;
635			break;
636		case 'm':
637			smf_options = optarg;
638			smf_debug = (strstr(smf_options, "debug") != NULL);
639			break;
640		}
641	}
642
643	/*
644	 * Determine if we are the main init, or a user invoked init, whose job
645	 * it is to inform init to change levels or perform some other action.
646	 */
647	if (zone_getattr(getzoneid(), ZONE_ATTR_INITPID, &init_pid,
648	    sizeof (init_pid)) != sizeof (init_pid)) {
649		(void) fprintf(stderr, "could not get pid for init\n");
650		return (1);
651	}
652
653	/*
654	 * If this PID is not the same as the "true" init for the zone, then we
655	 * must be in 'user' mode.
656	 */
657	if (getpid() != init_pid) {
658		userinit(argc, argv);
659	}
660
661	if (getzoneid() != GLOBAL_ZONEID) {
662		print_banner = TRUE;
663	}
664
665	/*
666	 * Initialize state (and set "booting").
667	 */
668	st_init();
669
670	if (booting && print_banner) {
671		struct utsname un;
672		char buf[BUFSIZ], *isa;
673		long ret;
674		int bits = 32;
675
676		/*
677		 * We want to print the boot banner as soon as
678		 * possible.  In the global zone, the kernel does it,
679		 * but we do not have that luxury in non-global zones,
680		 * so we will print it here.
681		 */
682		(void) uname(&un);
683		ret = sysinfo(SI_ISALIST, buf, sizeof (buf));
684		if (ret != -1L && ret <= sizeof (buf)) {
685			for (isa = strtok(buf, " "); isa;
686			    isa = strtok(NULL, " ")) {
687				if (strcmp(isa, "sparcv9") == 0 ||
688				    strcmp(isa, "amd64") == 0) {
689					bits = 64;
690					break;
691				}
692			}
693		}
694
695		console(B_FALSE,
696		    "\n\n%s Release %s Version %s %d-bit\r\n",
697		    un.sysname, un.release, un.version, bits);
698		console(B_FALSE,
699		    "Copyright 1983-2008 Sun Microsystems, Inc. "
700		    " All rights reserved.\r\n");
701		console(B_FALSE,
702		    "Use is subject to license terms.\r\n");
703	}
704
705	/*
706	 * Get the ioctl settings for /dev/syscon from /etc/ioctl.syscon
707	 * so that it can be brought up in the state it was in when the
708	 * system went down; or set to defaults if ioctl.syscon isn't
709	 * valid.
710	 *
711	 * This needs to be done even if we're restarting so reset_modes()
712	 * will work in case we need to go down to single user mode.
713	 */
714	write_ioctl = get_ioctl_syscon();
715
716	/*
717	 * Set up all signals to be caught or ignored as appropriate.
718	 */
719	init_signals();
720
721	/* Load glob_envp from ENVFILE. */
722	init_env();
723
724	contracts_init();
725
726	if (!booting) {
727		/* cur_state should have been read in. */
728
729		op_modes = NORMAL_MODES;
730
731		/* Rewrite the ioctl file if it was bad. */
732		if (write_ioctl)
733			write_ioctl_syscon();
734	} else {
735		/*
736		 * It's fine to boot up with state as zero, because
737		 * startd will later tell us the real state.
738		 */
739		cur_state = 0;
740		op_modes = BOOT_MODES;
741
742		boot_init();
743	}
744
745	prev_state = prior_state = cur_state;
746
747	setup_pipe();
748
749	/*
750	 * Here is the beginning of the main process loop.
751	 */
752	for (;;) {
753		if (lvlq_received) {
754			setup_pipe();
755			lvlq_received = B_FALSE;
756		}
757
758		/*
759		 * Clean up any accounting records for dead "godchildren".
760		 */
761		if (Gchild)
762			cleanaux();
763
764		/*
765		 * If in "normal" mode, check all living processes and initiate
766		 * kill sequence on those that should not be there anymore.
767		 */
768		if (op_modes == NORMAL_MODES && cur_state != LVLa &&
769		    cur_state != LVLb && cur_state != LVLc)
770			remv();
771
772		/*
773		 * If a change in run levels is the reason we awoke, now do
774		 * the accounting to report the change in the utmp file.
775		 * Also report the change on the system console.
776		 */
777		if (chg_lvl_flag) {
778			chg_lvl_flag = FALSE;
779
780			if (state_to_flags(cur_state) & LSEL_RUNLEVEL) {
781				char rl = state_to_name(cur_state);
782
783				if (rl != -1)
784					lscf_set_runlevel(rl);
785			}
786
787			may_need_audit = 1;
788		}
789
790		/*
791		 * Scan the inittab file and spawn and respawn processes that
792		 * should be alive in the current state. If inittab does not
793		 * exist default to  single user mode.
794		 */
795		if (spawn_processes() == FAILURE) {
796			prior_state = prev_state;
797			cur_state = SINGLE_USER;
798		}
799
800		/* If any respawns occurred, take note. */
801		if (rsflag) {
802			rsflag = 0;
803			spawncnt++;
804		}
805
806		/*
807		 * If a powerfail signal was received during the last
808		 * sequence, set mode to powerfail.  When spawn_processes() is
809		 * entered the first thing it does is to check "powerhit".  If
810		 * it is in PF_MODES then it clears "powerhit" and does
811		 * a powerfail sequence.  If it is not in PF_MODES, then it
812		 * puts itself in PF_MODES and then clears "powerhit".  Should
813		 * "powerhit" get set again while spawn_processes() is working
814		 * on a powerfail sequence, the following code  will see that
815		 * spawn_processes() tries to execute the powerfail sequence
816		 * again.  This guarantees that the powerfail sequence will be
817		 * successfully completed before further processing takes
818		 * place.
819		 */
820		if (wakeup.w_flags.w_powerhit) {
821			op_modes = PF_MODES;
822			/*
823			 * Make sure that cur_state != prev_state so that
824			 * ONCE and WAIT types work.
825			 */
826			prev_state = 0;
827		} else if (op_modes != NORMAL_MODES) {
828			/*
829			 * If spawn_processes() was not just called while in
830			 * normal mode, we set the mode to normal and it will
831			 * be called again to check normal modes.  If we have
832			 * just finished a powerfail sequence with prev_state
833			 * equal to zero, we set prev_state equal to cur_state
834			 * before the next pass through.
835			 */
836			if (op_modes == PF_MODES)
837				prev_state = cur_state;
838			op_modes = NORMAL_MODES;
839		} else if (cur_state == LVLa || cur_state == LVLb ||
840		    cur_state == LVLc) {
841			/*
842			 * If it was a change of levels that awakened us and the
843			 * new level is one of the demand levels then reset
844			 * cur_state to the previous state and do another scan
845			 * to take care of the usual respawn actions.
846			 */
847			cur_state = prior_state;
848			prior_state = prev_state;
849			prev_state = cur_state;
850		} else {
851			prev_state = cur_state;
852
853			if (wakeup.w_mask == 0) {
854				int ret;
855
856				if (may_need_audit && (cur_state == LVL3)) {
857					msg = audit_boot_msg();
858
859					may_need_audit = 0;
860					(void) audit_put_record(ADT_SUCCESS,
861					    ADT_SUCCESS, msg);
862					free(msg);
863				}
864
865				/*
866				 * "init" is finished with all actions for
867				 * the current wakeup.
868				 */
869				ret = poll(poll_fds, poll_nfds,
870				    SLEEPTIME * MILLISEC);
871				pausecnt++;
872				if (ret > 0)
873					contract_event(&poll_fds[0]);
874				else if (ret < 0 && errno != EINTR)
875					console(B_TRUE, "poll() error: %s\n",
876					    strerror(errno));
877			}
878
879			if (wakeup.w_flags.w_usersignal) {
880				/*
881				 * Install the new level.  This could be a real
882				 * change in levels  or a telinit [Q|a|b|c] or
883				 * just a telinit to the same level at which
884				 * we are running.
885				 */
886				if (new_state != cur_state) {
887					if (new_state == LVLa ||
888					    new_state == LVLb ||
889					    new_state == LVLc) {
890						prev_state = prior_state;
891						prior_state = cur_state;
892						cur_state = new_state;
893					} else {
894						prev_state = cur_state;
895						if (cur_state >= 0)
896							prior_state = cur_state;
897						cur_state = new_state;
898						chg_lvl_flag = TRUE;
899					}
900				}
901
902				new_state = 0;
903			}
904
905			if (wakeup.w_flags.w_powerhit)
906				op_modes = PF_MODES;
907
908			/*
909			 * Clear all wakeup reasons.
910			 */
911			wakeup.w_mask = 0;
912		}
913	}
914
915	/*NOTREACHED*/
916}
917
918static void
919update_boot_archive(int new_state)
920{
921	if (new_state != LVL0 && new_state != LVL5 && new_state != LVL6)
922		return;
923
924	if (getzoneid() != GLOBAL_ZONEID)
925		return;
926
927	(void) system("/sbin/bootadm -a update_all");
928}
929
930/*
931 * void enter_maintenance()
932 *   A simple invocation of sulogin(1M), with no baggage, in the case that we
933 *   are unable to activate svc.startd(1M).  We fork; the child runs sulogin;
934 *   we wait for it to exit.
935 */
936static void
937enter_maintenance()
938{
939	struct PROC_TABLE	*su_process;
940
941	console(B_FALSE, "Requesting maintenance mode\n"
942	    "(See /lib/svc/share/README for additional information.)\n");
943	(void) sigset(SIGCLD, SIG_DFL);
944	while ((su_process = efork(M_OFF, NULLPROC, NOCLEANUP)) == NO_ROOM)
945		(void) pause();
946	(void) sigset(SIGCLD, childeath);
947	if (su_process == NULLPROC) {
948		int fd;
949
950		(void) fclose(stdin);
951		(void) fclose(stdout);
952		(void) fclose(stderr);
953		closefrom(0);
954
955		fd = open(SYSCON, O_RDWR | O_NOCTTY);
956		if (fd >= 0) {
957			(void) dup2(fd, 1);
958			(void) dup2(fd, 2);
959		} else {
960			/*
961			 * Need to issue an error message somewhere.
962			 */
963			syslog(LOG_CRIT, "init[%d]: cannot open %s; %s\n",
964			    getpid(), SYSCON, strerror(errno));
965		}
966
967		/*
968		 * Execute the "su" program.
969		 */
970		(void) execle(SU, SU, "-", (char *)0, glob_envp);
971		console(B_TRUE, "execle of %s failed: %s\n", SU,
972		    strerror(errno));
973		timer(5);
974		exit(1);
975	}
976
977	/*
978	 * If we are the parent, wait around for the child to die
979	 * or for "init" to be signaled to change levels.
980	 */
981	while (waitproc(su_process) == FAILURE) {
982		/*
983		 * All other reasons for waking are ignored when in
984		 * single-user mode.  The only child we are interested
985		 * in is being waited for explicitly by waitproc().
986		 */
987		wakeup.w_mask = 0;
988	}
989}
990
991/*
992 * remv() scans through "proc_table" and performs cleanup.  If
993 * there is a process in the table, which shouldn't be here at
994 * the current run level, then remv() kills the process.
995 */
996static void
997remv()
998{
999	struct PROC_TABLE	*process;
1000	struct CMD_LINE		cmd;
1001	char			cmd_string[MAXCMDL];
1002	int			change_level;
1003
1004	change_level = (cur_state != prev_state ? TRUE : FALSE);
1005
1006	/*
1007	 * Clear the TOUCHED flag on all entries so that when we have
1008	 * finished scanning inittab, we will be able to tell if we
1009	 * have any processes for which there is no entry in inittab.
1010	 */
1011	for (process = proc_table;
1012	    (process < proc_table + num_proc); process++) {
1013		process->p_flags &= ~TOUCHED;
1014	}
1015
1016	/*
1017	 * Scan all inittab entries.
1018	 */
1019	while (getcmd(&cmd, &cmd_string[0]) == TRUE) {
1020		/* Scan for process which goes with this entry in inittab. */
1021		for (process = proc_table;
1022		    (process < proc_table + num_proc); process++) {
1023			if ((process->p_flags & OCCUPIED) == 0 ||
1024			    !id_eq(process->p_id, cmd.c_id))
1025				continue;
1026
1027			/*
1028			 * This slot contains the process we are looking for.
1029			 */
1030
1031			/*
1032			 * Is the cur_state SINGLE_USER or is this process
1033			 * marked as "off" or was this proc started by some
1034			 * mechanism other than LVL{a|b|c} and the current level
1035			 * does not support this process?
1036			 */
1037			if (cur_state == SINGLE_USER ||
1038			    cmd.c_action == M_OFF ||
1039			    ((cmd.c_levels & state_to_mask(cur_state)) == 0 &&
1040			    (process->p_flags & DEMANDREQUEST) == 0)) {
1041				if (process->p_flags & LIVING) {
1042					/*
1043					 * Touch this entry so we know we have
1044					 * treated it.  Note that procs which
1045					 * are already dead at this point and
1046					 * should not be restarted are left
1047					 * untouched.  This causes their slot to
1048					 * be freed later after dead accounting
1049					 * is done.
1050					 */
1051					process->p_flags |= TOUCHED;
1052
1053					if ((process->p_flags & KILLED) == 0) {
1054						if (change_level) {
1055							process->p_flags
1056							    |= WARNED;
1057							(void) kill(
1058							    process->p_pid,
1059							    SIGTERM);
1060						} else {
1061							/*
1062							 * Fork a killing proc
1063							 * so "init" can
1064							 * continue without
1065							 * having to pause for
1066							 * TWARN seconds.
1067							 */
1068							killproc(
1069							    process->p_pid);
1070						}
1071						process->p_flags |= KILLED;
1072					}
1073				}
1074			} else {
1075				/*
1076				 * Process can exist at current level.  If it is
1077				 * still alive or a DEMANDREQUEST we touch it so
1078				 * it will be left alone.  Otherwise we leave it
1079				 * untouched so it will be accounted for and
1080				 * cleaned up later in remv().  Dead
1081				 * DEMANDREQUESTs will be accounted but not
1082				 * freed.
1083				 */
1084				if (process->p_flags &
1085				    (LIVING|NOCLEANUP|DEMANDREQUEST))
1086					process->p_flags |= TOUCHED;
1087			}
1088
1089			break;
1090		}
1091	}
1092
1093	st_write();
1094
1095	/*
1096	 * If this was a change of levels call, scan through the
1097	 * process table for processes that were warned to die.  If any
1098	 * are found that haven't left yet, sleep for TWARN seconds and
1099	 * then send final terminations to any that haven't died yet.
1100	 */
1101	if (change_level) {
1102
1103		/*
1104		 * Set the alarm for TWARN seconds on the assumption
1105		 * that there will be some that need to be waited for.
1106		 * This won't harm anything except we are guaranteed to
1107		 * wakeup in TWARN seconds whether we need to or not.
1108		 */
1109		setimer(TWARN);
1110
1111		/*
1112		 * Scan for processes which should be dying.  We hope they
1113		 * will die without having to be sent a SIGKILL signal.
1114		 */
1115		for (process = proc_table;
1116		    (process < proc_table + num_proc); process++) {
1117			/*
1118			 * If this process should die, hasn't yet, and the
1119			 * TWARN time hasn't expired yet, wait for process
1120			 * to die or for timer to expire.
1121			 */
1122			while (time_up == FALSE &&
1123			    (process->p_flags & (WARNED|LIVING|OCCUPIED)) ==
1124			    (WARNED|LIVING|OCCUPIED))
1125				(void) pause();
1126
1127			if (time_up == TRUE)
1128				break;
1129		}
1130
1131		/*
1132		 * If we reached the end of the table without the timer
1133		 * expiring, then there are no procs which will have to be
1134		 * sent the SIGKILL signal.  If the timer has expired, then
1135		 * it is necessary to scan the table again and send signals
1136		 * to all processes which aren't going away nicely.
1137		 */
1138		if (time_up == TRUE) {
1139			for (process = proc_table;
1140			    (process < proc_table + num_proc); process++) {
1141				if ((process->p_flags &
1142				    (WARNED|LIVING|OCCUPIED)) ==
1143				    (WARNED|LIVING|OCCUPIED))
1144					(void) kill(process->p_pid, SIGKILL);
1145			}
1146		}
1147		setimer(0);
1148	}
1149
1150	/*
1151	 * Rescan the proc_table for two kinds of entry, those marked LIVING,
1152	 * NAMED, which don't have an entry in inittab (haven't been TOUCHED
1153	 * by the above scanning), and haven't been sent kill signals, and
1154	 * those entries marked not LIVING, NAMED.  The former procs are killed.
1155	 * The latter have DEAD_PROCESS accounting done and the slot cleared.
1156	 */
1157	for (process = proc_table;
1158	    (process < proc_table + num_proc); process++) {
1159		if ((process->p_flags & (LIVING|NAMED|TOUCHED|KILLED|OCCUPIED))
1160		    == (LIVING|NAMED|OCCUPIED)) {
1161			killproc(process->p_pid);
1162			process->p_flags |= KILLED;
1163		} else if ((process->p_flags & (LIVING|NAMED|OCCUPIED)) ==
1164		    (NAMED|OCCUPIED)) {
1165			(void) account(DEAD_PROCESS, process, NULL);
1166			/*
1167			 * If this named proc hasn't been TOUCHED, then free the
1168			 * space. It has either died of it's own accord, but
1169			 * isn't respawnable or it was killed because it
1170			 * shouldn't exist at this level.
1171			 */
1172			if ((process->p_flags & TOUCHED) == 0)
1173				process->p_flags = 0;
1174		}
1175	}
1176
1177	st_write();
1178}
1179
1180/*
1181 * Extract the svc.startd command line and whether to restart it from its
1182 * inittab entry.
1183 */
1184/*ARGSUSED*/
1185static void
1186process_startd_line(struct CMD_LINE *cmd, char *cmd_string)
1187{
1188	size_t sz;
1189
1190	/* Save the command line. */
1191	if (sflg || rflg) {
1192		/* Also append -r or -s. */
1193		(void) strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1194		(void) strlcat(startd_cline, " -", sizeof (startd_cline));
1195		if (sflg)
1196			sz = strlcat(startd_cline, "s", sizeof (startd_cline));
1197		if (rflg)
1198			sz = strlcat(startd_cline, "r", sizeof (startd_cline));
1199	} else {
1200		sz = strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
1201	}
1202
1203	if (sz >= sizeof (startd_cline)) {
1204		console(B_TRUE,
1205		    "svc.startd command line too long.  Ignoring.\n");
1206		startd_cline[0] = '\0';
1207		return;
1208	}
1209}
1210
1211/*
1212 * spawn_processes() scans inittab for entries which should be run at this
1213 * mode.  Processes which should be running but are not, are started.
1214 */
1215static int
1216spawn_processes()
1217{
1218	struct PROC_TABLE		*pp;
1219	struct CMD_LINE			cmd;
1220	char				cmd_string[MAXCMDL];
1221	short				lvl_mask;
1222	int				status;
1223
1224	/*
1225	 * First check the "powerhit" flag.  If it is set, make sure the modes
1226	 * are PF_MODES and clear the "powerhit" flag.  Avoid the possible race
1227	 * on the "powerhit" flag by disallowing a new powerfail interrupt
1228	 * between the test of the powerhit flag and the clearing of it.
1229	 */
1230	if (wakeup.w_flags.w_powerhit) {
1231		wakeup.w_flags.w_powerhit = 0;
1232		op_modes = PF_MODES;
1233	}
1234	lvl_mask = state_to_mask(cur_state);
1235
1236	/*
1237	 * Scan through all the entries in inittab.
1238	 */
1239	while ((status = getcmd(&cmd, &cmd_string[0])) == TRUE) {
1240		if (id_eq(cmd.c_id, "smf")) {
1241			process_startd_line(&cmd, cmd_string);
1242			continue;
1243		}
1244
1245retry_for_proc_slot:
1246
1247		/*
1248		 * Find out if there is a process slot for this entry already.
1249		 */
1250		if ((pp = findpslot(&cmd)) == NULLPROC) {
1251			/*
1252			 * we've run out of proc table entries
1253			 * increase proc_table.
1254			 */
1255			increase_proc_table_size();
1256
1257			/*
1258			 * Retry now as we have an empty proc slot.
1259			 * In case increase_proc_table_size() fails,
1260			 * we will keep retrying.
1261			 */
1262			goto retry_for_proc_slot;
1263		}
1264
1265		/*
1266		 * If there is an entry, and it is marked as DEMANDREQUEST,
1267		 * one of the levels a, b, or c is in its levels mask, and
1268		 * the action field is ONDEMAND and ONDEMAND is a permissable
1269		 * mode, and the process is dead, then respawn it.
1270		 */
1271		if (((pp->p_flags & (LIVING|DEMANDREQUEST)) == DEMANDREQUEST) &&
1272		    (cmd.c_levels & MASK_abc) &&
1273		    (cmd.c_action & op_modes) == M_ONDEMAND) {
1274			spawn(pp, &cmd);
1275			continue;
1276		}
1277
1278		/*
1279		 * If the action is not an action we are interested in,
1280		 * skip the entry.
1281		 */
1282		if ((cmd.c_action & op_modes) == 0 || pp->p_flags & LIVING ||
1283		    (cmd.c_levels & lvl_mask) == 0)
1284			continue;
1285
1286		/*
1287		 * If the modes are the normal modes (ONCE, WAIT, RESPAWN, OFF,
1288		 * ONDEMAND) and the action field is either OFF or the action
1289		 * field is ONCE or WAIT and the current level is the same as
1290		 * the last level, then skip this entry.  ONCE and WAIT only
1291		 * get run when the level changes.
1292		 */
1293		if (op_modes == NORMAL_MODES &&
1294		    (cmd.c_action == M_OFF ||
1295			(cmd.c_action & (M_ONCE|M_WAIT)) &&
1296			cur_state == prev_state))
1297			continue;
1298
1299		/*
1300		 * At this point we are interested in performing the action for
1301		 * this entry.  Actions fall into two categories, spinning off
1302		 * a process and not waiting, and spinning off a process and
1303		 * waiting for it to die.  If the action is ONCE, RESPAWN,
1304		 * ONDEMAND, POWERFAIL, or BOOT we don't wait for the process
1305		 * to die, for all other actions we do wait.
1306		 */
1307		if (cmd.c_action & (M_ONCE | M_RESPAWN | M_PF | M_BOOT)) {
1308			spawn(pp, &cmd);
1309
1310		} else {
1311			spawn(pp, &cmd);
1312			while (waitproc(pp) == FAILURE);
1313			(void) account(DEAD_PROCESS, pp, NULL);
1314			pp->p_flags = 0;
1315		}
1316	}
1317	return (status);
1318}
1319
1320/*
1321 * spawn() spawns a shell, inserts the information about the process
1322 * process into the proc_table, and does the startup accounting.
1323 */
1324static void
1325spawn(struct PROC_TABLE *process, struct CMD_LINE *cmd)
1326{
1327	int		i;
1328	int		modes, maxfiles;
1329	time_t		now;
1330	struct PROC_TABLE tmproc, *oprocess;
1331
1332	/*
1333	 * The modes to be sent to efork() are 0 unless we are
1334	 * spawning a LVLa, LVLb, or LVLc entry or we will be
1335	 * waiting for the death of the child before continuing.
1336	 */
1337	modes = NAMED;
1338	if (process->p_flags & DEMANDREQUEST || cur_state == LVLa ||
1339	    cur_state == LVLb || cur_state == LVLc)
1340		modes |= DEMANDREQUEST;
1341	if ((cmd->c_action & (M_SYSINIT | M_WAIT | M_BOOTWAIT | M_PWAIT)) != 0)
1342		modes |= NOCLEANUP;
1343
1344	/*
1345	 * If this is a respawnable process, check the threshold
1346	 * information to avoid excessive respawns.
1347	 */
1348	if (cmd->c_action & M_RESPAWN) {
1349		/*
1350		 * Add NOCLEANUP to all respawnable commands so that the
1351		 * information about the frequency of respawns isn't lost.
1352		 */
1353		modes |= NOCLEANUP;
1354		(void) time(&now);
1355
1356		/*
1357		 * If no time is assigned, then this is the first time
1358		 * this command is being processed in this series.  Assign
1359		 * the current time.
1360		 */
1361		if (process->p_time == 0L)
1362			process->p_time = now;
1363
1364		if (process->p_count++ == SPAWN_LIMIT) {
1365
1366			if ((now - process->p_time) < SPAWN_INTERVAL) {
1367				/*
1368				 * Process is respawning too rapidly.  Print
1369				 * message and refuse to respawn it for now.
1370				 */
1371				console(B_TRUE, "Command is respawning too "
1372				    "rapidly. Check for possible errors.\n"
1373				    "id:%4s \"%s\"\n",
1374				    &cmd->c_id[0], &cmd->c_command[EXEC]);
1375				return;
1376			}
1377			process->p_time = now;
1378			process->p_count = 0;
1379
1380		} else if (process->p_count > SPAWN_LIMIT) {
1381			/*
1382			 * If process has been respawning too rapidly and
1383			 * the inhibit time limit hasn't expired yet, we
1384			 * refuse to respawn.
1385			 */
1386			if (now - process->p_time < SPAWN_INTERVAL + INHIBIT)
1387				return;
1388			process->p_time = now;
1389			process->p_count = 0;
1390		}
1391		rsflag = TRUE;
1392	}
1393
1394	/*
1395	 * Spawn a child process to execute this command.
1396	 */
1397	(void) sigset(SIGCLD, SIG_DFL);
1398	oprocess = process;
1399	while ((process = efork(cmd->c_action, oprocess, modes)) == NO_ROOM)
1400		(void) pause();
1401
1402	if (process == NULLPROC) {
1403
1404		/*
1405		 * We are the child.  We must make sure we get a different
1406		 * file pointer for our references to utmpx.  Otherwise our
1407		 * seeks and reads will compete with those of the parent.
1408		 */
1409		endutxent();
1410
1411		/*
1412		 * Perform the accounting for the beginning of a process.
1413		 * Note that all processes are initially "INIT_PROCESS"es.
1414		 */
1415		tmproc.p_id[0] = cmd->c_id[0];
1416		tmproc.p_id[1] = cmd->c_id[1];
1417		tmproc.p_id[2] = cmd->c_id[2];
1418		tmproc.p_id[3] = cmd->c_id[3];
1419		tmproc.p_pid = getpid();
1420		tmproc.p_exit = 0;
1421		(void) account(INIT_PROCESS, &tmproc,
1422		    prog_name(&cmd->c_command[EXEC]));
1423		maxfiles = ulimit(UL_GDESLIM, 0);
1424		for (i = 0; i < maxfiles; i++)
1425			(void) fcntl(i, F_SETFD, FD_CLOEXEC);
1426
1427		/*
1428		 * Now exec a shell with the -c option and the command
1429		 * from inittab.
1430		 */
1431		(void) execle(SH, "INITSH", "-c", cmd->c_command, (char *)0,
1432		    glob_envp);
1433		console(B_TRUE, "Command\n\"%s\"\n failed to execute.  errno "
1434		    "= %d (exec of shell failed)\n", cmd->c_command, errno);
1435
1436		/*
1437		 * Don't come back so quickly that "init" doesn't have a
1438		 * chance to finish putting this child in "proc_table".
1439		 */
1440		timer(20);
1441		exit(1);
1442
1443	}
1444
1445	/*
1446	 * We are the parent.  Insert the necessary
1447	 * information in the proc_table.
1448	 */
1449	process->p_id[0] = cmd->c_id[0];
1450	process->p_id[1] = cmd->c_id[1];
1451	process->p_id[2] = cmd->c_id[2];
1452	process->p_id[3] = cmd->c_id[3];
1453
1454	st_write();
1455
1456	(void) sigset(SIGCLD, childeath);
1457}
1458
1459/*
1460 * findpslot() finds the old slot in the process table for the
1461 * command with the same id, or it finds an empty slot.
1462 */
1463static struct PROC_TABLE *
1464findpslot(struct CMD_LINE *cmd)
1465{
1466	struct PROC_TABLE	*process;
1467	struct PROC_TABLE	*empty = NULLPROC;
1468
1469	for (process = proc_table;
1470	    (process < proc_table + num_proc); process++) {
1471		if (process->p_flags & OCCUPIED &&
1472		    id_eq(process->p_id, cmd->c_id))
1473			break;
1474
1475		/*
1476		 * If the entry is totally empty and "empty" is still 0,
1477		 * remember where this hole is and make sure the slot is
1478		 * zeroed out.
1479		 */
1480		if (empty == NULLPROC && (process->p_flags & OCCUPIED) == 0) {
1481			empty = process;
1482			process->p_id[0] = '\0';
1483			process->p_id[1] = '\0';
1484			process->p_id[2] = '\0';
1485			process->p_id[3] = '\0';
1486			process->p_pid = 0;
1487			process->p_time = 0L;
1488			process->p_count = 0;
1489			process->p_flags = 0;
1490			process->p_exit = 0;
1491		}
1492	}
1493
1494	/*
1495	 * If there is no entry for this slot, then there should be an
1496	 * empty slot.  If there is no empty slot, then we've run out
1497	 * of proc_table space.  If the latter is true, empty will be
1498	 * NULL and the caller will have to complain.
1499	 */
1500	if (process == (proc_table + num_proc))
1501		process = empty;
1502
1503	return (process);
1504}
1505
1506/*
1507 * getcmd() parses lines from inittab.  Each time it finds a command line
1508 * it will return TRUE as well as fill the passed CMD_LINE structure and
1509 * the shell command string.  When the end of inittab is reached, FALSE
1510 * is returned inittab is automatically opened if it is not currently open
1511 * and is closed when the end of the file is reached.
1512 */
1513static FILE *fp_inittab = NULL;
1514
1515static int
1516getcmd(struct CMD_LINE *cmd, char *shcmd)
1517{
1518	char	*ptr;
1519	int	c, lastc, state;
1520	char 	*ptr1;
1521	int	answer, i, proceed;
1522	struct	stat	sbuf;
1523	static char *actions[] = {
1524		"off", "respawn", "ondemand", "once", "wait", "boot",
1525		"bootwait", "powerfail", "powerwait", "initdefault",
1526		"sysinit",
1527	};
1528	static short act_masks[] = {
1529		M_OFF, M_RESPAWN, M_ONDEMAND, M_ONCE, M_WAIT, M_BOOT,
1530		M_BOOTWAIT, M_PF, M_PWAIT, M_INITDEFAULT, M_SYSINIT,
1531	};
1532	/*
1533	 * Only these actions will be allowed for entries which
1534	 * are specified for single-user mode.
1535	 */
1536	short su_acts = M_INITDEFAULT | M_PF | M_PWAIT | M_WAIT;
1537
1538	if (fp_inittab == NULL) {
1539		/*
1540		 * Before attempting to open inittab we stat it to make
1541		 * sure it currently exists and is not empty.  We try
1542		 * several times because someone may have temporarily
1543		 * unlinked or truncated the file.
1544		 */
1545		for (i = 0; i < 3; i++) {
1546			if (stat(INITTAB, &sbuf) == -1) {
1547				if (i == 2) {
1548					console(B_TRUE,
1549					    "Cannot stat %s, errno: %d\n",
1550					    INITTAB, errno);
1551					return (FAILURE);
1552				} else {
1553					timer(3);
1554				}
1555			} else if (sbuf.st_size < 10) {
1556				if (i == 2) {
1557					console(B_TRUE,
1558					    "%s truncated or corrupted\n",
1559					    INITTAB);
1560					return (FAILURE);
1561				} else {
1562					timer(3);
1563				}
1564			} else {
1565				break;
1566			}
1567		}
1568
1569		/*
1570		 * If unable to open inittab, print error message and
1571		 * return FAILURE to caller.
1572		 */
1573		if ((fp_inittab = fopen(INITTAB, "r")) == NULL) {
1574			console(B_TRUE, "Cannot open %s errno: %d\n", INITTAB,
1575			    errno);
1576			return (FAILURE);
1577		}
1578	}
1579
1580	/*
1581	 * Keep getting commands from inittab until you find a
1582	 * good one or run out of file.
1583	 */
1584	for (answer = FALSE; answer == FALSE; ) {
1585		/*
1586		 * Zero out the cmd itself before trying next line.
1587		 */
1588		bzero(cmd, sizeof (struct CMD_LINE));
1589
1590		/*
1591		 * Read in lines of inittab, parsing at colons, until a line is
1592		 * read in which doesn't end with a backslash.  Do not start if
1593		 * the first character read is an EOF.  Note that this means
1594		 * that lines which don't end in a newline are still processed,
1595		 * since the "for" will terminate normally once started,
1596		 * regardless of whether line terminates with a newline or EOF.
1597		 */
1598		state = FAILURE;
1599		if ((c = fgetc(fp_inittab)) == EOF) {
1600			answer = FALSE;
1601			(void) fclose(fp_inittab);
1602			fp_inittab = NULL;
1603			break;
1604		}
1605
1606		for (proceed = TRUE, ptr = shcmd, state = ID, lastc = '\0';
1607		    proceed && c != EOF;
1608		    lastc = c, c = fgetc(fp_inittab)) {
1609		    /* If we're not in the FAILURE state and haven't	*/
1610		    /* yet reached the shell command field, process	*/
1611		    /* the line, otherwise just look for a real end	*/
1612		    /* of line.						*/
1613		    if (state != FAILURE && state != COMMAND) {
1614			/*
1615			 * Squeeze out spaces and tabs.
1616			 */
1617			if (c == ' ' || c == '\t')
1618				continue;
1619
1620			/*
1621			 * Ignore characters in a comment, except for the \n.
1622			 */
1623			if (state == COMMENT) {
1624				if (c == '\n') {
1625					lastc = ' ';
1626					break;
1627				} else {
1628					continue;
1629				}
1630			}
1631
1632			/*
1633			 * Detect comments (lines whose first non-whitespace
1634			 * character is '#') by checking that we're at the
1635			 * beginning of a line, have seen a '#', and haven't
1636			 * yet accumulated any characters.
1637			 */
1638			if (state == ID && c == '#' && ptr == shcmd) {
1639				state = COMMENT;
1640				continue;
1641			}
1642
1643			/*
1644			 * If the character is a ':', then check the
1645			 * previous field for correctness and advance
1646			 * to the next field.
1647			 */
1648			if (c == ':') {
1649			    switch (state) {
1650
1651			    case ID :
1652				/*
1653				 * Check to see that there are only
1654				 * 1 to 4 characters for the id.
1655				 */
1656				if ((i = ptr - shcmd) < 1 || i > 4) {
1657					state = FAILURE;
1658				} else {
1659					bcopy(shcmd, &cmd->c_id[0], i);
1660					ptr = shcmd;
1661					state = LEVELS;
1662				}
1663				break;
1664
1665			    case LEVELS :
1666				/*
1667				 * Build a mask for all the levels for
1668				 * which this command will be legal.
1669				 */
1670				for (cmd->c_levels = 0, ptr1 = shcmd;
1671				    ptr1 < ptr; ptr1++) {
1672					int mask;
1673					if (lvlname_to_mask(*ptr1,
1674					    &mask) == -1) {
1675						state = FAILURE;
1676						break;
1677					}
1678					cmd->c_levels |= mask;
1679				}
1680				if (state != FAILURE) {
1681					state = ACTION;
1682					ptr = shcmd;	/* Reset the buffer */
1683				}
1684				break;
1685
1686			    case ACTION :
1687				/*
1688				 * Null terminate the string in shcmd buffer and
1689				 * then try to match against legal actions.  If
1690				 * the field is of length 0, then the default of
1691				 * "RESPAWN" is used if the id is numeric,
1692				 * otherwise the default is "OFF".
1693				 */
1694				if (ptr == shcmd) {
1695					if (isdigit(cmd->c_id[0]) &&
1696					    (cmd->c_id[1] == '\0' ||
1697						isdigit(cmd->c_id[1])) &&
1698					    (cmd->c_id[2] == '\0' ||
1699						isdigit(cmd->c_id[2])) &&
1700					    (cmd->c_id[3] == '\0' ||
1701						isdigit(cmd->c_id[3])))
1702						    cmd->c_action = M_RESPAWN;
1703					else
1704						    cmd->c_action = M_OFF;
1705				} else {
1706				    for (cmd->c_action = 0, i = 0, *ptr = '\0';
1707				    i < sizeof (actions)/sizeof (char *);
1708				    i++) {
1709					if (strcmp(shcmd, actions[i]) == 0) {
1710					    if ((cmd->c_levels & MASKSU) &&
1711						!(act_masks[i] & su_acts))
1712						    cmd->c_action = 0;
1713					    else
1714						cmd->c_action = act_masks[i];
1715					    break;
1716					}
1717				    }
1718				}
1719
1720				/*
1721				 * If the action didn't match any legal action,
1722				 * set state to FAILURE.
1723				 */
1724				if (cmd->c_action == 0) {
1725					state = FAILURE;
1726				} else {
1727					state = COMMAND;
1728					(void) strcpy(shcmd, "exec ");
1729				}
1730				ptr = shcmd + EXEC;
1731				break;
1732			    }
1733			    continue;
1734			}
1735		    }
1736
1737		    /* If the character is a '\n', then this is the end of a */
1738		    /* line.  If the '\n' wasn't preceded by a backslash, */
1739		    /* it is also the end of an inittab command.  If it was */
1740		    /* preceded by a backslash then the next line is a */
1741		    /* continuation.  Note that the continuation '\n' falls */
1742		    /* through and is treated like other characters and is */
1743		    /* stored in the shell command line. */
1744		    if (c == '\n' && lastc != '\\') {
1745				proceed = FALSE;
1746				*ptr = '\0';
1747				break;
1748		    }
1749
1750		    /* For all other characters just stuff them into the */
1751		    /* command as long as there aren't too many of them. */
1752		    /* Make sure there is room for a terminating '\0' also. */
1753		    if (ptr >= shcmd + MAXCMDL - 1)
1754			state = FAILURE;
1755		    else
1756			*ptr++ = (char)c;
1757
1758		    /* If the character we just stored was a quoted	*/
1759		    /* backslash, then change "c" to '\0', so that this	*/
1760		    /* backslash will not cause a subsequent '\n' to appear */
1761		    /* quoted.  In otherwords '\' '\' '\n' is the real end */
1762		    /* of a command, while '\' '\n' is a continuation. */
1763		    if (c == '\\' && lastc == '\\')
1764			c = '\0';
1765		}
1766
1767		/*
1768		 * Make sure all the fields are properly specified
1769		 * for a good command line.
1770		 */
1771		if (state == COMMAND) {
1772			answer = TRUE;
1773			cmd->c_command = shcmd;
1774
1775			/*
1776			 * If no default level was supplied, insert
1777			 * all numerical levels.
1778			 */
1779			if (cmd->c_levels == 0)
1780				cmd->c_levels = MASK_NUMERIC;
1781
1782			/*
1783			 * If no action has been supplied, declare this
1784			 * entry to be OFF.
1785			 */
1786			if (cmd->c_action == 0)
1787				cmd->c_action = M_OFF;
1788
1789			/*
1790			 * If no shell command has been supplied, make sure
1791			 * there is a null string in the command field.
1792			 */
1793			if (ptr == shcmd + EXEC)
1794				*shcmd = '\0';
1795		} else
1796			answer = FALSE;
1797
1798		/*
1799		 * If we have reached the end of inittab, then close it
1800		 * and quit trying to find a good command line.
1801		 */
1802		if (c == EOF) {
1803			(void) fclose(fp_inittab);
1804			fp_inittab = NULL;
1805			break;
1806		}
1807	}
1808	return (answer);
1809}
1810
1811/*
1812 * lvlname_to_state(): convert the character name of a state to its level
1813 * (its corresponding signal number).
1814 */
1815static int
1816lvlname_to_state(char name)
1817{
1818	int i;
1819	for (i = 0; i < LVL_NELEMS; i++) {
1820		if (lvls[i].lvl_name == name)
1821			return (lvls[i].lvl_state);
1822	}
1823	return (-1);
1824}
1825
1826/*
1827 * state_to_name(): convert the level to the character name.
1828 */
1829static char
1830state_to_name(int state)
1831{
1832	int i;
1833	for (i = 0; i < LVL_NELEMS; i++) {
1834		if (lvls[i].lvl_state == state)
1835			return (lvls[i].lvl_name);
1836	}
1837	return (-1);
1838}
1839
1840/*
1841 * state_to_mask(): return the mask corresponding to a signal number
1842 */
1843static int
1844state_to_mask(int state)
1845{
1846	int i;
1847	for (i = 0; i < LVL_NELEMS; i++) {
1848		if (lvls[i].lvl_state == state)
1849			return (lvls[i].lvl_mask);
1850	}
1851	return (0);	/* return 0, since that represents an empty mask */
1852}
1853
1854/*
1855 * lvlname_to_mask(): return the mask corresponding to a levels character name
1856 */
1857static int
1858lvlname_to_mask(char name, int *mask)
1859{
1860	int i;
1861	for (i = 0; i < LVL_NELEMS; i++) {
1862		if (lvls[i].lvl_name == name) {
1863			*mask = lvls[i].lvl_mask;
1864			return (0);
1865		}
1866	}
1867	return (-1);
1868}
1869
1870/*
1871 * state_to_flags(): return the flags corresponding to a runlevel.  These
1872 * indicate properties of that runlevel.
1873 */
1874static int
1875state_to_flags(int state)
1876{
1877	int i;
1878	for (i = 0; i < LVL_NELEMS; i++) {
1879		if (lvls[i].lvl_state == state)
1880			return (lvls[i].lvl_flags);
1881	}
1882	return (0);
1883}
1884
1885/*
1886 * killproc() creates a child which kills the process specified by pid.
1887 */
1888void
1889killproc(pid_t pid)
1890{
1891	struct PROC_TABLE	*process;
1892
1893	(void) sigset(SIGCLD, SIG_DFL);
1894	while ((process = efork(M_OFF, NULLPROC, 0)) == NO_ROOM)
1895		(void) pause();
1896	(void) sigset(SIGCLD, childeath);
1897
1898	if (process == NULLPROC) {
1899		/*
1900		 * efork() sets all signal handlers to the default, so reset
1901		 * the ALRM handler to make timer() work as expected.
1902		 */
1903		(void) sigset(SIGALRM, alarmclk);
1904
1905		/*
1906		 * We are the child.  Try to terminate the process nicely
1907		 * first using SIGTERM and if it refuses to die in TWARN
1908		 * seconds kill it with SIGKILL.
1909		 */
1910		(void) kill(pid, SIGTERM);
1911		(void) timer(TWARN);
1912		(void) kill(pid, SIGKILL);
1913		(void) exit(0);
1914	}
1915}
1916
1917/*
1918 * Set up the default environment for all procs to be forked from init.
1919 * Read the values from the /etc/default/init file, except for PATH.  If
1920 * there's not enough room in the environment array, the environment
1921 * lines that don't fit are silently discarded.
1922 */
1923void
1924init_env()
1925{
1926	char	line[MAXCMDL];
1927	FILE	*fp;
1928	int	inquotes, length, wslength;
1929	char	*tokp, *cp1, *cp2;
1930
1931	glob_envp[0] = malloc((unsigned)(strlen(DEF_PATH)+2));
1932	(void) strcpy(glob_envp[0], DEF_PATH);
1933	glob_envn = 1;
1934
1935	if (rflg) {
1936		glob_envp[1] =
1937			malloc((unsigned)(strlen("_DVFS_RECONFIG=YES")+2));
1938		(void) strcpy(glob_envp[1], "_DVFS_RECONFIG=YES");
1939		++glob_envn;
1940	} else if (bflg == 1) {
1941		glob_envp[1] =
1942			malloc((unsigned)(strlen("RB_NOBOOTRC=YES")+2));
1943		(void) strcpy(glob_envp[1], "RB_NOBOOTRC=YES");
1944		++glob_envn;
1945	}
1946
1947	if ((fp = fopen(ENVFILE, "r")) == NULL) {
1948		console(B_TRUE,
1949		    "Cannot open %s. Environment not initialized.\n",
1950		    ENVFILE);
1951	} else {
1952		while (fgets(line, MAXCMDL - 1, fp) != NULL &&
1953		    glob_envn < MAXENVENT - 2) {
1954			/*
1955			 * Toss newline
1956			 */
1957			length = strlen(line);
1958			if (line[length - 1] == '\n')
1959				line[length - 1] = '\0';
1960
1961			/*
1962			 * Ignore blank or comment lines.
1963			 */
1964			if (line[0] == '#' || line[0] == '\0' ||
1965			    (wslength = strspn(line, " \t\n")) ==
1966			    strlen(line) ||
1967			    strchr(line, '#') == line + wslength)
1968				continue;
1969
1970			/*
1971			 * First make a pass through the line and change
1972			 * any non-quoted semi-colons to blanks so they
1973			 * will be treated as token separators below.
1974			 */
1975			inquotes = 0;
1976			for (cp1 = line; *cp1 != '\0'; cp1++) {
1977				if (*cp1 == '"') {
1978					if (inquotes == 0)
1979						inquotes = 1;
1980					else
1981						inquotes = 0;
1982				} else if (*cp1 == ';') {
1983					if (inquotes == 0)
1984						*cp1 = ' ';
1985				}
1986			}
1987
1988			/*
1989			 * Tokens within the line are separated by blanks
1990			 *  and tabs.  For each token in the line which
1991			 * contains a '=' we strip out any quotes and then
1992			 * stick the token in the environment array.
1993			 */
1994			if ((tokp = strtok(line, " \t")) == NULL)
1995				continue;
1996			do {
1997				if (strchr(tokp, '=') == NULL)
1998					continue;
1999				length = strlen(tokp);
2000				while ((cp1 = strpbrk(tokp, "\"\'")) != NULL) {
2001					for (cp2 = cp1;
2002					    cp2 < &tokp[length]; cp2++)
2003						*cp2 = *(cp2 + 1);
2004					length--;
2005				}
2006
2007				if (strncmp(tokp, "CMASK=",
2008				    sizeof ("CMASK=") - 1) == 0) {
2009					long t;
2010
2011					/* We know there's an = */
2012					t = strtol(strchr(tokp, '=') + 1, NULL,
2013					    8);
2014
2015					/* Sanity */
2016					if (t <= 077 && t >= 0)
2017						cmask = (int)t;
2018					(void) umask(cmask);
2019					continue;
2020				}
2021				glob_envp[glob_envn] =
2022				    malloc((unsigned)(length + 1));
2023				(void) strcpy(glob_envp[glob_envn], tokp);
2024				if (++glob_envn >= MAXENVENT - 1)
2025					break;
2026			} while ((tokp = strtok(NULL, " \t")) != NULL);
2027		}
2028
2029		/*
2030		 * Append a null pointer to the environment array
2031		 * to mark its end.
2032		 */
2033		glob_envp[glob_envn] = NULL;
2034		(void) fclose(fp);
2035	}
2036}
2037
2038/*
2039 * boot_init(): Do initialization things that should be done at boot.
2040 */
2041void
2042boot_init()
2043{
2044	int i;
2045	struct PROC_TABLE *process, *oprocess;
2046	struct CMD_LINE	cmd;
2047	char	line[MAXCMDL];
2048	char	svc_aux[SVC_AUX_SIZE];
2049	char	init_svc_fmri[SVC_FMRI_SIZE];
2050	char *old_path;
2051	int maxfiles;
2052
2053	/* Use INIT_PATH for sysinit cmds */
2054	old_path = glob_envp[0];
2055	glob_envp[0] = malloc((unsigned)(strlen(INIT_PATH)+2));
2056	(void) strcpy(glob_envp[0], INIT_PATH);
2057
2058	/*
2059	 * Scan inittab(4) and process the special svc.startd entry, initdefault
2060	 * and sysinit entries.
2061	 */
2062	while (getcmd(&cmd, &line[0]) == TRUE) {
2063		if (startd_tmpl >= 0 && id_eq(cmd.c_id, "smf")) {
2064			process_startd_line(&cmd, line);
2065			(void) snprintf(startd_svc_aux, SVC_AUX_SIZE,
2066			    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2067		} else if (cmd.c_action == M_INITDEFAULT) {
2068			/*
2069			 * initdefault is no longer meaningful, as the SMF
2070			 * milestone controls what (legacy) run level we
2071			 * boot to.
2072			 */
2073			console(B_TRUE,
2074			    "Ignoring legacy \"initdefault\" entry.\n");
2075		} else if (cmd.c_action == M_SYSINIT) {
2076			/*
2077			 * Execute the "sysinit" entry and wait for it to
2078			 * complete.  No bookkeeping is performed on these
2079			 * entries because we avoid writing to the file system
2080			 * until after there has been an chance to check it.
2081			 */
2082			if (process = findpslot(&cmd)) {
2083				(void) sigset(SIGCLD, SIG_DFL);
2084				(void) snprintf(svc_aux, SVC_AUX_SIZE,
2085				    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
2086				(void) snprintf(init_svc_fmri, SVC_FMRI_SIZE,
2087				    SVC_INIT_PREFIX INITTAB_ENTRY_ID_STR_FORMAT,
2088				    cmd.c_id);
2089				if (legacy_tmpl >= 0) {
2090					(void) ct_pr_tmpl_set_svc_fmri(
2091					    legacy_tmpl, init_svc_fmri);
2092					(void) ct_pr_tmpl_set_svc_aux(
2093					    legacy_tmpl, svc_aux);
2094				}
2095
2096				for (oprocess = process;
2097				    (process = efork(M_OFF, oprocess,
2098				    (NAMED|NOCLEANUP))) == NO_ROOM;
2099				    /* CSTYLED */)
2100					;
2101				(void) sigset(SIGCLD, childeath);
2102
2103				if (process == NULLPROC) {
2104					maxfiles = ulimit(UL_GDESLIM, 0);
2105
2106					for (i = 0; i < maxfiles; i++)
2107						(void) fcntl(i, F_SETFD,
2108						    FD_CLOEXEC);
2109					(void) execle(SH, "INITSH", "-c",
2110					    cmd.c_command,
2111					    (char *)0, glob_envp);
2112					console(B_TRUE,
2113"Command\n\"%s\"\n failed to execute.  errno = %d (exec of shell failed)\n",
2114						cmd.c_command, errno);
2115					exit(1);
2116				} else while (waitproc(process) == FAILURE);
2117				process->p_flags = 0;
2118				st_write();
2119			}
2120		}
2121	}
2122
2123	/* Restore the path. */
2124	free(glob_envp[0]);
2125	glob_envp[0] = old_path;
2126
2127	/*
2128	 * This will enable st_write() to complain about init_state_file.
2129	 */
2130	booting = 0;
2131
2132	/*
2133	 * If the /etc/ioctl.syscon didn't exist or had invalid contents write
2134	 * out a correct version.
2135	 */
2136	if (write_ioctl)
2137		write_ioctl_syscon();
2138
2139	/*
2140	 * Start svc.startd(1M), which does most of the work.
2141	 */
2142	if (startd_cline[0] != '\0' && startd_tmpl >= 0) {
2143		/* Start svc.startd. */
2144		if (startd_run(startd_cline, startd_tmpl, 0) == -1)
2145			cur_state = SINGLE_USER;
2146	} else {
2147		console(B_TRUE, "Absent svc.startd entry or bad "
2148		    "contract template.  Not starting svc.startd.\n");
2149		enter_maintenance();
2150	}
2151}
2152
2153/*
2154 * init_signals(): Initialize all signals to either be caught or ignored.
2155 */
2156void
2157init_signals(void)
2158{
2159	struct sigaction act;
2160	int i;
2161
2162	/*
2163	 * Start by ignoring all signals, then selectively re-enable some.
2164	 * The SIG_IGN disposition will only affect asynchronous signals:
2165	 * any signal that we trigger synchronously that doesn't end up
2166	 * being handled by siglvl() will be forcibly delivered by the kernel.
2167	 */
2168	for (i = SIGHUP; i <= SIGRTMAX; i++)
2169		(void) sigset(i, SIG_IGN);
2170
2171	/*
2172	 * Handle all level-changing signals using siglvl() and set sa_mask so
2173	 * that all level-changing signals are blocked while in siglvl().
2174	 */
2175	act.sa_handler = siglvl;
2176	act.sa_flags = SA_SIGINFO;
2177	(void) sigemptyset(&act.sa_mask);
2178
2179	(void) sigaddset(&act.sa_mask, LVLQ);
2180	(void) sigaddset(&act.sa_mask, LVL0);
2181	(void) sigaddset(&act.sa_mask, LVL1);
2182	(void) sigaddset(&act.sa_mask, LVL2);
2183	(void) sigaddset(&act.sa_mask, LVL3);
2184	(void) sigaddset(&act.sa_mask, LVL4);
2185	(void) sigaddset(&act.sa_mask, LVL5);
2186	(void) sigaddset(&act.sa_mask, LVL6);
2187	(void) sigaddset(&act.sa_mask, SINGLE_USER);
2188	(void) sigaddset(&act.sa_mask, LVLa);
2189	(void) sigaddset(&act.sa_mask, LVLb);
2190	(void) sigaddset(&act.sa_mask, LVLc);
2191
2192	(void) sigaction(LVLQ, &act, NULL);
2193	(void) sigaction(LVL0, &act, NULL);
2194	(void) sigaction(LVL1, &act, NULL);
2195	(void) sigaction(LVL2, &act, NULL);
2196	(void) sigaction(LVL3, &act, NULL);
2197	(void) sigaction(LVL4, &act, NULL);
2198	(void) sigaction(LVL5, &act, NULL);
2199	(void) sigaction(LVL6, &act, NULL);
2200	(void) sigaction(SINGLE_USER, &act, NULL);
2201	(void) sigaction(LVLa, &act, NULL);
2202	(void) sigaction(LVLb, &act, NULL);
2203	(void) sigaction(LVLc, &act, NULL);
2204
2205	(void) sigset(SIGALRM, alarmclk);
2206	alarmclk();
2207
2208	(void) sigset(SIGCLD, childeath);
2209	(void) sigset(SIGPWR, powerfail);
2210}
2211
2212/*
2213 * Set up pipe for "godchildren". If the file exists and is a pipe just open
2214 * it. Else, if the file system is r/w create it.  Otherwise, defer its
2215 * creation and open until after /var/run has been mounted.  This function is
2216 * only called on startup and when explicitly requested via LVLQ.
2217 */
2218void
2219setup_pipe()
2220{
2221	struct stat stat_buf;
2222	struct statvfs statvfs_buf;
2223	struct sigaction act;
2224
2225	/*
2226	 * Always close the previous pipe descriptor as the mounted filesystems
2227	 * may have changed.
2228	 */
2229	if (Pfd >= 0)
2230		(void) close(Pfd);
2231
2232	if ((stat(INITPIPE, &stat_buf) == 0) &&
2233	    ((stat_buf.st_mode & (S_IFMT|S_IRUSR)) == (S_IFIFO|S_IRUSR)))
2234		Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2235	else
2236		if ((statvfs(INITPIPE_DIR, &statvfs_buf) == 0) &&
2237		    ((statvfs_buf.f_flag & ST_RDONLY) == 0)) {
2238			(void) unlink(INITPIPE);
2239			(void) mknod(INITPIPE, S_IFIFO | 0600, 0);
2240			Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
2241		}
2242
2243	if (Pfd >= 0) {
2244		(void) ioctl(Pfd, I_SETSIG, S_INPUT);
2245		/*
2246		 * Read pipe in message discard mode.
2247		 */
2248		(void) ioctl(Pfd, I_SRDOPT, RMSGD);
2249
2250		act.sa_handler = sigpoll;
2251		act.sa_flags = 0;
2252		(void) sigemptyset(&act.sa_mask);
2253		(void) sigaddset(&act.sa_mask, SIGCLD);
2254		(void) sigaction(SIGPOLL, &act, NULL);
2255	}
2256}
2257
2258/*
2259 * siglvl - handle an asynchronous signal from init(1M) telling us that we
2260 * should change the current run level.  We set new_state accordingly.
2261 */
2262void
2263siglvl(int sig, siginfo_t *sip, ucontext_t *ucp)
2264{
2265	struct PROC_TABLE *process;
2266	struct sigaction act;
2267
2268	/*
2269	 * If the signal was from the kernel (rather than init(1M)) then init
2270	 * itself tripped the signal.  That is, we might have a bug and tripped
2271	 * a real SIGSEGV instead of receiving it as an alias for SIGLVLa.  In
2272	 * such a case we reset the disposition to SIG_DFL, block all signals
2273	 * in uc_mask but the current one, and return to the interrupted ucp
2274	 * to effect an appropriate death.  The kernel will then restart us.
2275	 *
2276	 * The one exception to SI_FROMKERNEL() is SIGFPE (a.k.a. LVL6), which
2277	 * the kernel can send us when it wants to effect an orderly reboot.
2278	 * For this case we must also verify si_code is zero, rather than a
2279	 * code such as FPE_INTDIV which a bug might have triggered.
2280	 */
2281	if (sip != NULL && SI_FROMKERNEL(sip) &&
2282	    (sig != SIGFPE || sip->si_code == 0)) {
2283
2284		(void) sigemptyset(&act.sa_mask);
2285		act.sa_handler = SIG_DFL;
2286		act.sa_flags = 0;
2287		(void) sigaction(sig, &act, NULL);
2288
2289		(void) sigfillset(&ucp->uc_sigmask);
2290		(void) sigdelset(&ucp->uc_sigmask, sig);
2291		ucp->uc_flags |= UC_SIGMASK;
2292
2293		(void) setcontext(ucp);
2294	}
2295
2296	/*
2297	 * If the signal received is a LVLQ signal, do not really
2298	 * change levels, just restate the current level.  If the
2299	 * signal is not a LVLQ, set the new level to the signal
2300	 * received.
2301	 */
2302	if (sig == LVLQ) {
2303		new_state = cur_state;
2304		lvlq_received = B_TRUE;
2305	} else {
2306		new_state = sig;
2307	}
2308
2309	/*
2310	 * Clear all times and repeat counts in the process table
2311	 * since either the level is changing or the user has editted
2312	 * the inittab file and wants us to look at it again.
2313	 * If the user has fixed a typo, we don't want residual timing
2314	 * data preventing the fixed command line from executing.
2315	 */
2316	for (process = proc_table;
2317		(process < proc_table + num_proc); process++) {
2318		process->p_time = 0L;
2319		process->p_count = 0;
2320	}
2321
2322	/*
2323	 * Set the flag to indicate that a "user signal" was received.
2324	 */
2325	wakeup.w_flags.w_usersignal = 1;
2326}
2327
2328
2329/*
2330 * alarmclk
2331 */
2332static void
2333alarmclk()
2334{
2335	time_up = TRUE;
2336}
2337
2338/*
2339 * childeath_single():
2340 *
2341 * This used to be the SIGCLD handler and it was set with signal()
2342 * (as opposed to sigset()).  When a child exited we'd come to the
2343 * handler, wait for the child, and reenable the handler with
2344 * signal() just before returning.  The implementation of signal()
2345 * checks with waitid() for waitable children and sends a SIGCLD
2346 * if there are some.  If children are exiting faster than the
2347 * handler can run we keep sending signals and the handler never
2348 * gets to return and eventually the stack runs out and init dies.
2349 * To prevent that we set the handler with sigset() so the handler
2350 * doesn't need to be reset, and in childeath() (see below) we
2351 * call childeath_single() as long as there are children to be
2352 * waited for.  If a child exits while init is in the handler a
2353 * SIGCLD will be pending and delivered on return from the handler.
2354 * If the child was already waited for the handler will have nothing
2355 * to do and return, otherwise the child will be waited for.
2356 */
2357static void
2358childeath_single()
2359{
2360	struct PROC_TABLE	*process;
2361	struct pidlist		*pp;
2362	pid_t			pid;
2363	int			status;
2364
2365	/*
2366	 * Perform wait to get the process id of the child that died and
2367	 * then scan the process table to see if we are interested in
2368	 * this process. NOTE: if a super-user sends the SIGCLD signal
2369	 * to init, the following wait will not immediately return and
2370	 * init will be inoperative until one of its child really does die.
2371	 */
2372	pid = wait(&status);
2373
2374	for (process = proc_table;
2375		(process < proc_table + num_proc); process++) {
2376		if ((process->p_flags & (LIVING|OCCUPIED)) ==
2377		    (LIVING|OCCUPIED) && process->p_pid == pid) {
2378
2379			/*
2380			 * Mark this process as having died and store the exit
2381			 * status.  Also set the wakeup flag for a dead child
2382			 * and break out of the loop.
2383			 */
2384			process->p_flags &= ~LIVING;
2385			process->p_exit = (short)status;
2386			wakeup.w_flags.w_childdeath = 1;
2387
2388			return;
2389		}
2390	}
2391
2392	/*
2393	 * No process was found above, look through auxiliary list.
2394	 */
2395	(void) sighold(SIGPOLL);
2396	pp = Plhead;
2397	while (pp) {
2398		if (pid > pp->pl_pid) {
2399			/*
2400			 * Keep on looking.
2401			 */
2402			pp = pp->pl_next;
2403			continue;
2404		} else if (pid < pp->pl_pid) {
2405			/*
2406			 * Not in the list.
2407			 */
2408			break;
2409		} else {
2410			/*
2411			 * This is a dead "godchild".
2412			 */
2413			pp->pl_dflag = 1;
2414			pp->pl_exit = (short)status;
2415			wakeup.w_flags.w_childdeath = 1;
2416			Gchild = 1;	/* Notice to call cleanaux(). */
2417			break;
2418		}
2419	}
2420
2421	(void) sigrelse(SIGPOLL);
2422}
2423
2424/* ARGSUSED */
2425static void
2426childeath(int signo)
2427{
2428	siginfo_t info;
2429
2430	while ((waitid(P_ALL, (id_t)0, &info, WEXITED|WNOHANG|WNOWAIT) == 0) &&
2431	    info.si_pid != 0)
2432		childeath_single();
2433}
2434
2435static void
2436powerfail()
2437{
2438	(void) nice(-19);
2439	wakeup.w_flags.w_powerhit = 1;
2440}
2441
2442/*
2443 * efork() forks a child and the parent inserts the process in its table
2444 * of processes that are directly a result of forks that it has performed.
2445 * The child just changes the "global" with the process id for this process
2446 * to it's new value.
2447 * If efork() is called with a pointer into the proc_table it uses that slot,
2448 * otherwise it searches for a free slot.  Regardless of how it was called,
2449 * it returns the pointer to the proc_table entry
2450 *
2451 * The SIGCLD handler is set to default (SIG_DFL) before calling efork().
2452 * This relies on the somewhat obscure SVR2 SIGCLD/SIG_DFL semantic
2453 * implied by the use of signal(3c).  While the meaning of SIG_DFL for
2454 * SIGCLD is nominally to ignore the signal, once the signal disposition
2455 * is set to childeath(), the kernel will post a SIGCLD if a child
2456 * exited during the period the disposition was SIG_DFL.  It acts more
2457 * like a signal block.
2458 *
2459 * Ideally, this should be rewritten to use modern signal semantics.
2460 */
2461static struct PROC_TABLE *
2462efork(int action, struct PROC_TABLE *process, int modes)
2463{
2464	pid_t	childpid;
2465	struct PROC_TABLE *proc;
2466	int		i;
2467	void (*oldroutine)();
2468	/*
2469	 * Freshen up the proc_table, removing any entries for dead processes
2470	 * that don't have NOCLEANUP set.  Perform the necessary accounting.
2471	 */
2472	for (proc = proc_table; (proc < proc_table + num_proc); proc++) {
2473		if ((proc->p_flags & (OCCUPIED|LIVING|NOCLEANUP)) ==
2474		    (OCCUPIED)) {
2475			/*
2476			 * Is this a named process?
2477			 * If so, do the necessary bookkeeping.
2478			 */
2479			if (proc->p_flags & NAMED)
2480				(void) account(DEAD_PROCESS, proc, NULL);
2481
2482			/*
2483			 * Free this entry for new usage.
2484			 */
2485			proc->p_flags = 0;
2486		}
2487	}
2488
2489	while ((childpid = fork()) == FAILURE) {
2490		/*
2491		 * Shorten the alarm timer in case someone else's child dies
2492		 * and free up a slot in the process table.
2493		 */
2494		setimer(5);
2495
2496		/*
2497		 * Wait for some children to die.  Since efork() is normally
2498		 * called with SIGCLD in the default state, reset it to catch
2499		 * so that child death signals can come in.
2500		 */
2501		oldroutine = sigset(SIGCLD, childeath);
2502		(void) pause();
2503		(void) sigset(SIGCLD, oldroutine);
2504		setimer(0);
2505	}
2506
2507	if (childpid != 0) {
2508
2509		if (process == NULLPROC) {
2510			/*
2511			 * No proc table pointer specified so search
2512			 * for a free slot.
2513			 */
2514			for (process = proc_table;  process->p_flags != 0 &&
2515				(process < proc_table + num_proc); process++)
2516					;
2517
2518			if (process == (proc_table + num_proc)) {
2519				int old_proc_table_size = num_proc;
2520
2521				/* Increase the process table size */
2522				increase_proc_table_size();
2523				if (old_proc_table_size == num_proc) {
2524					/* didn't grow: memory failure */
2525					return (NO_ROOM);
2526				} else {
2527					process =
2528					    proc_table + old_proc_table_size;
2529				}
2530			}
2531
2532			process->p_time = 0L;
2533			process->p_count = 0;
2534		}
2535		process->p_id[0] = '\0';
2536		process->p_id[1] = '\0';
2537		process->p_id[2] = '\0';
2538		process->p_id[3] = '\0';
2539		process->p_pid = childpid;
2540		process->p_flags = (LIVING | OCCUPIED | modes);
2541		process->p_exit = 0;
2542
2543		st_write();
2544	} else {
2545		if ((action & (M_WAIT | M_BOOTWAIT)) == 0)
2546			(void) setpgrp();
2547
2548		process = NULLPROC;
2549
2550		/*
2551		 * Reset all signals to the system defaults.
2552		 */
2553		for (i = SIGHUP; i <= SIGRTMAX; i++)
2554			(void) sigset(i, SIG_DFL);
2555
2556		/*
2557		 * POSIX B.2.2.2 advises that init should set SIGTTOU,
2558		 * SIGTTIN, and SIGTSTP to SIG_IGN.
2559		 *
2560		 * Make sure that SIGXCPU and SIGXFSZ also remain ignored,
2561		 * for backward compatibility.
2562		 */
2563		(void) sigset(SIGTTIN, SIG_IGN);
2564		(void) sigset(SIGTTOU, SIG_IGN);
2565		(void) sigset(SIGTSTP, SIG_IGN);
2566		(void) sigset(SIGXCPU, SIG_IGN);
2567		(void) sigset(SIGXFSZ, SIG_IGN);
2568	}
2569	return (process);
2570}
2571
2572
2573/*
2574 * waitproc() waits for a specified process to die.  For this function to
2575 * work, the specified process must already in the proc_table.  waitproc()
2576 * returns the exit status of the specified process when it dies.
2577 */
2578static long
2579waitproc(struct PROC_TABLE *process)
2580{
2581	int		answer;
2582	sigset_t	oldmask, newmask, zeromask;
2583
2584	(void) sigemptyset(&zeromask);
2585	(void) sigemptyset(&newmask);
2586
2587	(void) sigaddset(&newmask, SIGCLD);
2588
2589	/* Block SIGCLD and save the current signal mask */
2590	if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
2591		perror("SIG_BLOCK error");
2592
2593	/*
2594	 * Wait around until the process dies.
2595	 */
2596	if (process->p_flags & LIVING)
2597		(void) sigsuspend(&zeromask);
2598
2599	/* Reset signal mask to unblock SIGCLD */
2600	if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
2601		perror("SIG_SETMASK error");
2602
2603	if (process->p_flags & LIVING)
2604		return (FAILURE);
2605
2606	/*
2607	 * Make sure to only return 16 bits so that answer will always
2608	 * be positive whenever the process of interest really died.
2609	 */
2610	answer = (process->p_exit & 0xffff);
2611
2612	/*
2613	 * Free the slot in the proc_table.
2614	 */
2615	process->p_flags = 0;
2616	return (answer);
2617}
2618
2619/*
2620 * notify_pam_dead(): calls into the PAM framework to close the given session.
2621 */
2622static void
2623notify_pam_dead(struct utmpx *up)
2624{
2625	pam_handle_t *pamh;
2626	char user[sizeof (up->ut_user) + 1];
2627	char ttyn[sizeof (up->ut_line) + 1];
2628	char host[sizeof (up->ut_host) + 1];
2629
2630	/*
2631	 * PAM does not take care of updating utmpx/wtmpx.
2632	 */
2633	(void) snprintf(user, sizeof (user), "%s", up->ut_user);
2634	(void) snprintf(ttyn, sizeof (ttyn), "%s", up->ut_line);
2635	(void) snprintf(host, sizeof (host), "%s", up->ut_host);
2636
2637	if (pam_start("init", user, NULL, &pamh) == PAM_SUCCESS)  {
2638		(void) pam_set_item(pamh, PAM_TTY, ttyn);
2639		(void) pam_set_item(pamh, PAM_RHOST, host);
2640		(void) pam_close_session(pamh, 0);
2641		(void) pam_end(pamh, PAM_SUCCESS);
2642	}
2643}
2644
2645/*
2646 * Check you can access utmpx (As / may be read-only and
2647 * /var may not be mounted yet).
2648 */
2649static int
2650access_utmpx(void)
2651{
2652	do {
2653		utmpx_ok = (access(UTMPX, R_OK|W_OK) == 0);
2654	} while (!utmpx_ok && errno == EINTR);
2655
2656	return (utmpx_ok);
2657}
2658
2659/*
2660 * account() updates entries in utmpx and appends new entries to the end of
2661 * wtmpx (assuming they exist).  The program argument indicates the name of
2662 * program if INIT_PROCESS, otherwise should be NULL.
2663 *
2664 * account() only blocks for INIT_PROCESS requests.
2665 *
2666 * Returns non-zero if write failed.
2667 */
2668static int
2669account(short state, struct PROC_TABLE *process, char *program)
2670{
2671	struct utmpx utmpbuf, *u, *oldu;
2672	int tmplen;
2673	char fail_buf[UT_LINE_SZ];
2674	sigset_t block, unblock;
2675
2676	if (!utmpx_ok && !access_utmpx()) {
2677		return (-1);
2678	}
2679
2680	/*
2681	 * Set up the prototype for the utmp structure we want to write.
2682	 */
2683	u = &utmpbuf;
2684	(void) memset(u, 0, sizeof (struct utmpx));
2685
2686	/*
2687	 * Fill in the various fields of the utmp structure.
2688	 */
2689	u->ut_id[0] = process->p_id[0];
2690	u->ut_id[1] = process->p_id[1];
2691	u->ut_id[2] = process->p_id[2];
2692	u->ut_id[3] = process->p_id[3];
2693	u->ut_pid = process->p_pid;
2694
2695	/*
2696	 * Fill the "ut_exit" structure.
2697	 */
2698	u->ut_exit.e_termination = WTERMSIG(process->p_exit);
2699	u->ut_exit.e_exit = WEXITSTATUS(process->p_exit);
2700	u->ut_type = state;
2701
2702	(void) time(&u->ut_tv.tv_sec);
2703
2704	/*
2705	 * Block signals for utmp update.
2706	 */
2707	(void) sigfillset(&block);
2708	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2709
2710	/*
2711	 * See if there already is such an entry in the "utmpx" file.
2712	 */
2713	setutxent();	/* Start at beginning of utmpx file. */
2714
2715	if ((oldu = getutxid(u)) != NULL) {
2716		/*
2717		 * Copy in the old "user", "line" and "host" fields
2718		 * to our new structure.
2719		 */
2720		bcopy(oldu->ut_user, u->ut_user, sizeof (u->ut_user));
2721		bcopy(oldu->ut_line, u->ut_line, sizeof (u->ut_line));
2722		bcopy(oldu->ut_host, u->ut_host, sizeof (u->ut_host));
2723		u->ut_syslen = (tmplen = strlen(u->ut_host)) ?
2724			min(tmplen + 1, sizeof (u->ut_host)) : 0;
2725
2726		if (oldu->ut_type == USER_PROCESS && state == DEAD_PROCESS) {
2727			notify_pam_dead(oldu);
2728		}
2729	}
2730
2731	/*
2732	 * Perform special accounting. Insert the special string into the
2733	 * ut_line array. For INIT_PROCESSes put in the name of the
2734	 * program in the "ut_user" field.
2735	 */
2736	switch (state) {
2737	case INIT_PROCESS:
2738		(void) strncpy(u->ut_user, program, sizeof (u->ut_user));
2739		(void) strcpy(fail_buf, "INIT_PROCESS");
2740		break;
2741
2742	default:
2743		(void) strlcpy(fail_buf, u->ut_id, sizeof (u->ut_id) + 1);
2744		break;
2745	}
2746
2747	/*
2748	 * Write out the updated entry to utmpx file.
2749	 */
2750	if (pututxline(u) == NULL) {
2751		console(B_TRUE, "Failed write of utmpx entry: \"%s\": %s\n",
2752		    fail_buf, strerror(errno));
2753		endutxent();
2754		(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2755		return (-1);
2756	}
2757
2758	/*
2759	 * If we're able to write to utmpx, then attempt to add to the
2760	 * end of the wtmpx file.
2761	 */
2762	updwtmpx(WTMPX, u);
2763
2764	endutxent();
2765
2766	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2767
2768	return (0);
2769}
2770
2771static void
2772clearent(pid_t pid, short status)
2773{
2774	struct utmpx *up;
2775	sigset_t block, unblock;
2776
2777	/*
2778	 * Block signals for utmp update.
2779	 */
2780	(void) sigfillset(&block);
2781	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
2782
2783	/*
2784	 * No error checking for now.
2785	 */
2786
2787	setutxent();
2788	while (up = getutxent()) {
2789		if (up->ut_pid == pid) {
2790			if (up->ut_type == DEAD_PROCESS) {
2791				/*
2792				 * Cleaned up elsewhere.
2793				 */
2794				continue;
2795			}
2796
2797			notify_pam_dead(up);
2798
2799			up->ut_type = DEAD_PROCESS;
2800			up->ut_exit.e_termination = WTERMSIG(status);
2801			up->ut_exit.e_exit = WEXITSTATUS(status);
2802			(void) time(&up->ut_tv.tv_sec);
2803
2804			(void) pututxline(up);
2805			/*
2806			 * Now attempt to add to the end of the
2807			 * wtmp and wtmpx files.  Do not create
2808			 * if they don't already exist.
2809			 */
2810			updwtmpx(WTMPX, up);
2811
2812			break;
2813		}
2814	}
2815
2816	endutxent();
2817	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
2818}
2819
2820/*
2821 * prog_name() searches for the word or unix path name and
2822 * returns a pointer to the last element of the pathname.
2823 */
2824static char *
2825prog_name(char *string)
2826{
2827	char	*ptr, *ptr2;
2828	/* XXX - utmp - fix name length */
2829	static char word[_POSIX_LOGIN_NAME_MAX];
2830
2831	/*
2832	 * Search for the first word skipping leading spaces and tabs.
2833	 */
2834	while (*string == ' ' || *string == '\t')
2835		string++;
2836
2837	/*
2838	 * If the first non-space non-tab character is not one allowed in
2839	 * a word, return a pointer to a null string, otherwise parse the
2840	 * pathname.
2841	 */
2842	if (*string != '.' && *string != '/' && *string != '_' &&
2843	    (*string < 'a' || *string > 'z') &&
2844	    (*string < 'A' || * string > 'Z') &&
2845	    (*string < '0' || *string > '9'))
2846		return ("");
2847
2848	/*
2849	 * Parse the pathname looking forward for '/', ' ', '\t', '\n' or
2850	 * '\0'.  Each time a '/' is found, move "ptr" to one past the
2851	 * '/', thus when a ' ', '\t', '\n', or '\0' is found, "ptr" will
2852	 * point to the last element of the pathname.
2853	 */
2854	for (ptr = string;
2855		*string != ' ' && *string != '\t' && *string != '\n' &&
2856							*string != '\0';
2857		string++) {
2858		if (*string == '/')
2859			ptr = string+1;
2860	}
2861
2862	/*
2863	 * Copy out up to the size of the "ut_user" array into "word",
2864	 * null terminate it and return a pointer to it.
2865	 */
2866	/* XXX - utmp - fix name length */
2867	for (ptr2 = &word[0]; ptr2 < &word[_POSIX_LOGIN_NAME_MAX - 1] &&
2868	    ptr < string; /* CSTYLED */)
2869		*ptr2++ = *ptr++;
2870
2871	*ptr2 = '\0';
2872	return (&word[0]);
2873}
2874
2875
2876/*
2877 * realcon() returns a nonzero value if there is a character device
2878 * associated with SYSCON that has the same device number as CONSOLE.
2879 */
2880static int
2881realcon()
2882{
2883	struct stat sconbuf, conbuf;
2884
2885	if (stat(SYSCON, &sconbuf) != -1 &&
2886	    stat(CONSOLE, &conbuf) != -1 &&
2887	    S_ISCHR(sconbuf.st_mode) &&
2888	    S_ISCHR(conbuf.st_mode) &&
2889	    sconbuf.st_rdev == conbuf.st_rdev) {
2890		return (1);
2891	} else {
2892		return (0);
2893	}
2894}
2895
2896
2897/*
2898 * get_ioctl_syscon() retrieves the SYSCON settings from the IOCTLSYSCON file.
2899 * Returns true if the IOCTLSYSCON file needs to be written (with
2900 * write_ioctl_syscon() below)
2901 */
2902static int
2903get_ioctl_syscon()
2904{
2905	FILE	*fp;
2906	unsigned int	iflags, oflags, cflags, lflags, ldisc, cc[18];
2907	int		i, valid_format = 0;
2908
2909	/*
2910	 * Read in the previous modes for SYSCON from IOCTLSYSCON.
2911	 */
2912	if ((fp = fopen(IOCTLSYSCON, "r")) == NULL) {
2913		stored_syscon_termios = dflt_termios;
2914		console(B_TRUE,
2915		    "warning:%s does not exist, default settings assumed\n",
2916		    IOCTLSYSCON);
2917	} else {
2918
2919	    i = fscanf(fp,
2920	    "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2921		&iflags, &oflags, &cflags, &lflags,
2922		&cc[0], &cc[1], &cc[2], &cc[3], &cc[4], &cc[5], &cc[6],
2923		&cc[7], &cc[8], &cc[9], &cc[10], &cc[11], &cc[12], &cc[13],
2924		&cc[14], &cc[15], &cc[16], &cc[17]);
2925
2926	    if (i == 22) {
2927		stored_syscon_termios.c_iflag = iflags;
2928		stored_syscon_termios.c_oflag = oflags;
2929		stored_syscon_termios.c_cflag = cflags;
2930		stored_syscon_termios.c_lflag = lflags;
2931		for (i = 0; i < 18; i++)
2932			stored_syscon_termios.c_cc[i] = (char)cc[i];
2933		valid_format = 1;
2934	    } else if (i == 13) {
2935		rewind(fp);
2936		i = fscanf(fp, "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
2937		    &iflags, &oflags, &cflags, &lflags, &ldisc, &cc[0], &cc[1],
2938		    &cc[2], &cc[3], &cc[4], &cc[5], &cc[6], &cc[7]);
2939
2940		/*
2941		 * If the file is formatted properly, use the values to
2942		 * initialize the console terminal condition.
2943		 */
2944		stored_syscon_termios.c_iflag = (ushort_t)iflags;
2945		stored_syscon_termios.c_oflag = (ushort_t)oflags;
2946		stored_syscon_termios.c_cflag = (ushort_t)cflags;
2947		stored_syscon_termios.c_lflag = (ushort_t)lflags;
2948		for (i = 0; i < 8; i++)
2949			stored_syscon_termios.c_cc[i] = (char)cc[i];
2950		valid_format = 1;
2951	    }
2952	    (void) fclose(fp);
2953
2954	    /* If the file is badly formatted, use the default settings. */
2955	    if (!valid_format)
2956		stored_syscon_termios = dflt_termios;
2957	}
2958
2959	/* If the file had a bad format, rewrite it later. */
2960	return (!valid_format);
2961}
2962
2963
2964static void
2965write_ioctl_syscon()
2966{
2967	FILE *fp;
2968	int i;
2969
2970	(void) unlink(SYSCON);
2971	(void) link(SYSTTY, SYSCON);
2972	(void) umask(022);
2973	fp = fopen(IOCTLSYSCON, "w");
2974
2975	(void) fprintf(fp, "%x:%x:%x:%x:0", stored_syscon_termios.c_iflag,
2976	    stored_syscon_termios.c_oflag, stored_syscon_termios.c_cflag,
2977	    stored_syscon_termios.c_lflag);
2978	for (i = 0; i < 8; ++i)
2979		(void) fprintf(fp, ":%x", stored_syscon_termios.c_cc[i]);
2980	(void) putc('\n', fp);
2981
2982	(void) fflush(fp);
2983	(void) fsync(fileno(fp));
2984	(void) fclose(fp);
2985	(void) umask(cmask);
2986}
2987
2988
2989/*
2990 * void console(boolean_t, char *, ...)
2991 *   Outputs the requested message to the system console.  Note that the number
2992 *   of arguments passed to console() should be determined by the print format.
2993 *
2994 *   The "prefix" parameter indicates whether or not "INIT: " should precede the
2995 *   message.
2996 *
2997 *   To make sure we write to the console in a sane fashion, we use the modes
2998 *   we keep in stored_syscon_termios (which we read out of /etc/ioctl.syscon).
2999 *   Afterwards we restore whatever modes were already there.
3000 */
3001/* PRINTFLIKE2 */
3002static void
3003console(boolean_t prefix, char *format, ...)
3004{
3005	char	outbuf[BUFSIZ];
3006	va_list	args;
3007	int fd, getret;
3008	struct termios old_syscon_termios;
3009	FILE *f;
3010
3011	/*
3012	 * We open SYSCON anew each time in case it has changed (see
3013	 * userinit()).
3014	 */
3015	if ((fd = open(SYSCON, O_RDWR | O_NOCTTY)) < 0 ||
3016	    (f = fdopen(fd, "r+")) == NULL) {
3017		if (prefix)
3018			syslog(LOG_WARNING, "INIT: ");
3019		va_start(args, format);
3020		vsyslog(LOG_WARNING, format, args);
3021		va_end(args);
3022		if (fd >= 0)
3023			(void) close(fd);
3024		return;
3025	}
3026	setbuf(f, &outbuf[0]);
3027
3028	getret = tcgetattr(fd, &old_syscon_termios);
3029	old_syscon_termios.c_cflag &= ~HUPCL;
3030	if (realcon())
3031		/* Don't overwrite cflag of real console. */
3032		stored_syscon_termios.c_cflag = old_syscon_termios.c_cflag;
3033
3034	stored_syscon_termios.c_cflag &= ~HUPCL;
3035
3036	(void) tcsetattr(fd, TCSANOW, &stored_syscon_termios);
3037
3038	if (prefix)
3039		(void) fprintf(f, "\nINIT: ");
3040	va_start(args, format);
3041	(void) vfprintf(f, format, args);
3042	va_end(args);
3043
3044	if (getret == 0)
3045		(void) tcsetattr(fd, TCSADRAIN, &old_syscon_termios);
3046
3047	(void) fclose(f);
3048}
3049
3050/*
3051 * timer() is a substitute for sleep() which uses alarm() and pause().
3052 */
3053static void
3054timer(int waitime)
3055{
3056	setimer(waitime);
3057	while (time_up == FALSE)
3058		(void) pause();
3059}
3060
3061static void
3062setimer(int timelimit)
3063{
3064	alarmclk();
3065	(void) alarm(timelimit);
3066	time_up = (timelimit ? FALSE : TRUE);
3067}
3068
3069/*
3070 * Fails with
3071 *   ENOMEM - out of memory
3072 *   ECONNABORTED - repository connection broken
3073 *   EPERM - permission denied
3074 *   EACCES - backend access denied
3075 *   EROFS - backend readonly
3076 */
3077static int
3078get_or_add_startd(scf_instance_t *inst)
3079{
3080	scf_handle_t *h;
3081	scf_scope_t *scope = NULL;
3082	scf_service_t *svc = NULL;
3083	int ret = 0;
3084
3085	h = scf_instance_handle(inst);
3086
3087	if (scf_handle_decode_fmri(h, SCF_SERVICE_STARTD, NULL, NULL, inst,
3088	    NULL, NULL, SCF_DECODE_FMRI_EXACT) == 0)
3089		return (0);
3090
3091	switch (scf_error()) {
3092	case SCF_ERROR_CONNECTION_BROKEN:
3093		return (ECONNABORTED);
3094
3095	case SCF_ERROR_NOT_FOUND:
3096		break;
3097
3098	case SCF_ERROR_HANDLE_MISMATCH:
3099	case SCF_ERROR_INVALID_ARGUMENT:
3100	case SCF_ERROR_CONSTRAINT_VIOLATED:
3101	default:
3102		bad_error("scf_handle_decode_fmri", scf_error());
3103	}
3104
3105	/* Make sure we're right, since we're adding piece-by-piece. */
3106	assert(strcmp(SCF_SERVICE_STARTD,
3107	    "svc:/system/svc/restarter:default") == 0);
3108
3109	if ((scope = scf_scope_create(h)) == NULL ||
3110	    (svc = scf_service_create(h)) == NULL) {
3111		ret = ENOMEM;
3112		goto out;
3113	}
3114
3115get_scope:
3116	if (scf_handle_get_scope(h, SCF_SCOPE_LOCAL, scope) != 0) {
3117		switch (scf_error()) {
3118		case SCF_ERROR_CONNECTION_BROKEN:
3119			ret = ECONNABORTED;
3120			goto out;
3121
3122		case SCF_ERROR_NOT_FOUND:
3123			(void) fputs(gettext(
3124			    "smf(5) repository missing local scope.\n"),
3125			    stderr);
3126			exit(1);
3127			/* NOTREACHED */
3128
3129		case SCF_ERROR_HANDLE_MISMATCH:
3130		case SCF_ERROR_INVALID_ARGUMENT:
3131		default:
3132			bad_error("scf_handle_get_scope", scf_error());
3133		}
3134	}
3135
3136get_svc:
3137	if (scf_scope_get_service(scope, "system/svc/restarter", svc) != 0) {
3138		switch (scf_error()) {
3139		case SCF_ERROR_CONNECTION_BROKEN:
3140			ret = ECONNABORTED;
3141			goto out;
3142
3143		case SCF_ERROR_DELETED:
3144			goto get_scope;
3145
3146		case SCF_ERROR_NOT_FOUND:
3147			break;
3148
3149		case SCF_ERROR_HANDLE_MISMATCH:
3150		case SCF_ERROR_INVALID_ARGUMENT:
3151		case SCF_ERROR_NOT_SET:
3152		default:
3153			bad_error("scf_scope_get_service", scf_error());
3154		}
3155
3156add_svc:
3157		if (scf_scope_add_service(scope, "system/svc/restarter", svc) !=
3158		    0) {
3159			switch (scf_error()) {
3160			case SCF_ERROR_CONNECTION_BROKEN:
3161				ret = ECONNABORTED;
3162				goto out;
3163
3164			case SCF_ERROR_EXISTS:
3165				goto get_svc;
3166
3167			case SCF_ERROR_PERMISSION_DENIED:
3168				ret = EPERM;
3169				goto out;
3170
3171			case SCF_ERROR_BACKEND_ACCESS:
3172				ret = EACCES;
3173				goto out;
3174
3175			case SCF_ERROR_BACKEND_READONLY:
3176				ret = EROFS;
3177				goto out;
3178
3179			case SCF_ERROR_HANDLE_MISMATCH:
3180			case SCF_ERROR_INVALID_ARGUMENT:
3181			case SCF_ERROR_NOT_SET:
3182			default:
3183				bad_error("scf_scope_add_service", scf_error());
3184			}
3185		}
3186	}
3187
3188get_inst:
3189	if (scf_service_get_instance(svc, "default", inst) != 0) {
3190		switch (scf_error()) {
3191		case SCF_ERROR_CONNECTION_BROKEN:
3192			ret = ECONNABORTED;
3193			goto out;
3194
3195		case SCF_ERROR_DELETED:
3196			goto add_svc;
3197
3198		case SCF_ERROR_NOT_FOUND:
3199			break;
3200
3201		case SCF_ERROR_HANDLE_MISMATCH:
3202		case SCF_ERROR_INVALID_ARGUMENT:
3203		case SCF_ERROR_NOT_SET:
3204		default:
3205			bad_error("scf_service_get_instance", scf_error());
3206		}
3207
3208		if (scf_service_add_instance(svc, "default", inst) !=
3209		    0) {
3210			switch (scf_error()) {
3211			case SCF_ERROR_CONNECTION_BROKEN:
3212				ret = ECONNABORTED;
3213				goto out;
3214
3215			case SCF_ERROR_DELETED:
3216				goto add_svc;
3217
3218			case SCF_ERROR_EXISTS:
3219				goto get_inst;
3220
3221			case SCF_ERROR_PERMISSION_DENIED:
3222				ret = EPERM;
3223				goto out;
3224
3225			case SCF_ERROR_BACKEND_ACCESS:
3226				ret = EACCES;
3227				goto out;
3228
3229			case SCF_ERROR_BACKEND_READONLY:
3230				ret = EROFS;
3231				goto out;
3232
3233			case SCF_ERROR_HANDLE_MISMATCH:
3234			case SCF_ERROR_INVALID_ARGUMENT:
3235			case SCF_ERROR_NOT_SET:
3236			default:
3237				bad_error("scf_service_add_instance",
3238				    scf_error());
3239			}
3240		}
3241	}
3242
3243	ret = 0;
3244
3245out:
3246	scf_service_destroy(svc);
3247	scf_scope_destroy(scope);
3248	return (ret);
3249}
3250
3251/*
3252 * Fails with
3253 *   ECONNABORTED - repository connection broken
3254 *   ECANCELED - the transaction's property group was deleted
3255 */
3256static int
3257transaction_add_set(scf_transaction_t *tx, scf_transaction_entry_t *ent,
3258    const char *pname, scf_type_t type)
3259{
3260change_type:
3261	if (scf_transaction_property_change_type(tx, ent, pname, type) == 0)
3262		return (0);
3263
3264	switch (scf_error()) {
3265	case SCF_ERROR_CONNECTION_BROKEN:
3266		return (ECONNABORTED);
3267
3268	case SCF_ERROR_DELETED:
3269		return (ECANCELED);
3270
3271	case SCF_ERROR_NOT_FOUND:
3272		goto new;
3273
3274	case SCF_ERROR_HANDLE_MISMATCH:
3275	case SCF_ERROR_INVALID_ARGUMENT:
3276	case SCF_ERROR_NOT_BOUND:
3277	case SCF_ERROR_NOT_SET:
3278	default:
3279		bad_error("scf_transaction_property_change_type", scf_error());
3280	}
3281
3282new:
3283	if (scf_transaction_property_new(tx, ent, pname, type) == 0)
3284		return (0);
3285
3286	switch (scf_error()) {
3287	case SCF_ERROR_CONNECTION_BROKEN:
3288		return (ECONNABORTED);
3289
3290	case SCF_ERROR_DELETED:
3291		return (ECANCELED);
3292
3293	case SCF_ERROR_EXISTS:
3294		goto change_type;
3295
3296	case SCF_ERROR_HANDLE_MISMATCH:
3297	case SCF_ERROR_INVALID_ARGUMENT:
3298	case SCF_ERROR_NOT_BOUND:
3299	case SCF_ERROR_NOT_SET:
3300	default:
3301		bad_error("scf_transaction_property_new", scf_error());
3302		/* NOTREACHED */
3303	}
3304}
3305
3306static void
3307scferr(void)
3308{
3309	switch (scf_error()) {
3310	case SCF_ERROR_NO_MEMORY:
3311		console(B_TRUE, gettext("Out of memory.\n"));
3312		break;
3313
3314	case SCF_ERROR_CONNECTION_BROKEN:
3315		console(B_TRUE, gettext(
3316		    "Connection to smf(5) repository server broken.\n"));
3317		break;
3318
3319	case SCF_ERROR_NO_RESOURCES:
3320		console(B_TRUE, gettext(
3321		    "smf(5) repository server is out of memory.\n"));
3322		break;
3323
3324	case SCF_ERROR_PERMISSION_DENIED:
3325		console(B_TRUE, gettext("Insufficient privileges.\n"));
3326		break;
3327
3328	default:
3329		console(B_TRUE, gettext("libscf error: %s\n"),
3330		    scf_strerror(scf_error()));
3331	}
3332}
3333
3334static void
3335lscf_set_runlevel(char rl)
3336{
3337	scf_handle_t *h;
3338	scf_instance_t *inst = NULL;
3339	scf_propertygroup_t *pg = NULL;
3340	scf_transaction_t *tx = NULL;
3341	scf_transaction_entry_t *ent = NULL;
3342	scf_value_t *val = NULL;
3343	char buf[2];
3344	int r;
3345
3346	h = scf_handle_create(SCF_VERSION);
3347	if (h == NULL) {
3348		scferr();
3349		return;
3350	}
3351
3352	if (scf_handle_bind(h) != 0) {
3353		switch (scf_error()) {
3354		case SCF_ERROR_NO_SERVER:
3355			console(B_TRUE,
3356			    gettext("smf(5) repository server not running.\n"));
3357			goto bail;
3358
3359		default:
3360			scferr();
3361			goto bail;
3362		}
3363	}
3364
3365	if ((inst = scf_instance_create(h)) == NULL ||
3366	    (pg = scf_pg_create(h)) == NULL ||
3367	    (val = scf_value_create(h)) == NULL ||
3368	    (tx = scf_transaction_create(h)) == NULL ||
3369	    (ent = scf_entry_create(h)) == NULL) {
3370		scferr();
3371		goto bail;
3372	}
3373
3374get_inst:
3375	r = get_or_add_startd(inst);
3376	switch (r) {
3377	case 0:
3378		break;
3379
3380	case ENOMEM:
3381	case ECONNABORTED:
3382	case EPERM:
3383	case EACCES:
3384	case EROFS:
3385		scferr();
3386		goto bail;
3387	default:
3388		bad_error("get_or_add_startd", r);
3389	}
3390
3391get_pg:
3392	if (scf_instance_get_pg(inst, SCF_PG_OPTIONS_OVR, pg) != 0) {
3393		switch (scf_error()) {
3394		case SCF_ERROR_CONNECTION_BROKEN:
3395			scferr();
3396			goto bail;
3397
3398		case SCF_ERROR_DELETED:
3399			goto get_inst;
3400
3401		case SCF_ERROR_NOT_FOUND:
3402			break;
3403
3404		case SCF_ERROR_HANDLE_MISMATCH:
3405		case SCF_ERROR_INVALID_ARGUMENT:
3406		case SCF_ERROR_NOT_SET:
3407		default:
3408			bad_error("scf_instance_get_pg", scf_error());
3409		}
3410
3411add_pg:
3412		if (scf_instance_add_pg(inst, SCF_PG_OPTIONS_OVR,
3413		    SCF_PG_OPTIONS_OVR_TYPE, SCF_PG_OPTIONS_OVR_FLAGS, pg) !=
3414		    0) {
3415			switch (scf_error()) {
3416			case SCF_ERROR_CONNECTION_BROKEN:
3417			case SCF_ERROR_PERMISSION_DENIED:
3418			case SCF_ERROR_BACKEND_ACCESS:
3419				scferr();
3420				goto bail;
3421
3422			case SCF_ERROR_DELETED:
3423				goto get_inst;
3424
3425			case SCF_ERROR_EXISTS:
3426				goto get_pg;
3427
3428			case SCF_ERROR_HANDLE_MISMATCH:
3429			case SCF_ERROR_INVALID_ARGUMENT:
3430			case SCF_ERROR_NOT_SET:
3431			default:
3432				bad_error("scf_instance_add_pg", scf_error());
3433			}
3434		}
3435	}
3436
3437	buf[0] = rl;
3438	buf[1] = '\0';
3439	r = scf_value_set_astring(val, buf);
3440	assert(r == 0);
3441
3442	for (;;) {
3443		if (scf_transaction_start(tx, pg) != 0) {
3444			switch (scf_error()) {
3445			case SCF_ERROR_CONNECTION_BROKEN:
3446			case SCF_ERROR_PERMISSION_DENIED:
3447			case SCF_ERROR_BACKEND_ACCESS:
3448				scferr();
3449				goto bail;
3450
3451			case SCF_ERROR_DELETED:
3452				goto add_pg;
3453
3454			case SCF_ERROR_HANDLE_MISMATCH:
3455			case SCF_ERROR_NOT_BOUND:
3456			case SCF_ERROR_IN_USE:
3457			case SCF_ERROR_NOT_SET:
3458			default:
3459				bad_error("scf_transaction_start", scf_error());
3460			}
3461		}
3462
3463		r = transaction_add_set(tx, ent, "runlevel", SCF_TYPE_ASTRING);
3464		switch (r) {
3465		case 0:
3466			break;
3467
3468		case ECONNABORTED:
3469			scferr();
3470			goto bail;
3471
3472		case ECANCELED:
3473			scf_transaction_reset(tx);
3474			goto add_pg;
3475
3476		default:
3477			bad_error("transaction_add_set", r);
3478		}
3479
3480		r = scf_entry_add_value(ent, val);
3481		assert(r == 0);
3482
3483		r = scf_transaction_commit(tx);
3484		if (r == 1)
3485			break;
3486
3487		if (r != 0) {
3488			switch (scf_error()) {
3489			case SCF_ERROR_CONNECTION_BROKEN:
3490			case SCF_ERROR_PERMISSION_DENIED:
3491			case SCF_ERROR_BACKEND_ACCESS:
3492			case SCF_ERROR_BACKEND_READONLY:
3493				scferr();
3494				goto bail;
3495
3496			case SCF_ERROR_DELETED:
3497				scf_transaction_reset(tx);
3498				goto add_pg;
3499
3500			case SCF_ERROR_INVALID_ARGUMENT:
3501			case SCF_ERROR_NOT_BOUND:
3502			case SCF_ERROR_NOT_SET:
3503			default:
3504				bad_error("scf_transaction_commit",
3505				    scf_error());
3506			}
3507		}
3508
3509		scf_transaction_reset(tx);
3510		(void) scf_pg_update(pg);
3511	}
3512
3513bail:
3514	scf_transaction_destroy(tx);
3515	scf_entry_destroy(ent);
3516	scf_value_destroy(val);
3517	scf_pg_destroy(pg);
3518	scf_instance_destroy(inst);
3519
3520	(void) scf_handle_unbind(h);
3521	scf_handle_destroy(h);
3522}
3523
3524/*
3525 * Function to handle requests from users to main init running as process 1.
3526 */
3527static void
3528userinit(int argc, char **argv)
3529{
3530	FILE	*fp;
3531	char	*ln;
3532	int	init_signal;
3533	struct stat	sconbuf, conbuf;
3534	const char *usage_msg = "Usage: init [0123456SsQqabc]\n";
3535
3536	/*
3537	 * We are a user invoked init.  Is there an argument and is it
3538	 * a single character?  If not, print usage message and quit.
3539	 */
3540	if (argc != 2 || argv[1][1] != '\0') {
3541		(void) fprintf(stderr, usage_msg);
3542		exit(0);
3543	}
3544
3545	if ((init_signal = lvlname_to_state((char)argv[1][0])) == -1) {
3546		(void) fprintf(stderr, usage_msg);
3547		(void) audit_put_record(ADT_FAILURE, ADT_FAIL_VALUE_BAD_CMD,
3548		    argv[1]);
3549		exit(1);
3550	}
3551
3552	if (init_signal == SINGLE_USER) {
3553		/*
3554		 * Make sure this process is talking to a legal tty line
3555		 * and that /dev/syscon is linked to this line.
3556		 */
3557		ln = ttyname(0);	/* Get the name of tty */
3558		if (ln == NULL) {
3559			(void) fprintf(stderr,
3560			    "Standard input not a tty line\n");
3561			(void) audit_put_record(ADT_FAILURE,
3562			    ADT_FAIL_VALUE_BAD_TTY, argv[1]);
3563			exit(1);
3564		}
3565
3566		if ((stat(ln, &sconbuf) != -1) &&
3567		    (stat(SYSCON, &conbuf) == -1 ||
3568		    sconbuf.st_rdev != conbuf.st_rdev)) {
3569			/*
3570			 * /dev/syscon needs to change.
3571			 * Unlink /dev/syscon and relink it to the current line.
3572			 */
3573			if (lstat(SYSCON, &conbuf) != -1 &&
3574			    unlink(SYSCON) == FAILURE) {
3575				perror("Can't unlink /dev/syscon");
3576				(void) fprintf(stderr,
3577				    "Run command on the system console.\n");
3578				(void) audit_put_record(ADT_FAILURE,
3579				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3580				exit(1);
3581			}
3582			if (symlink(ln, SYSCON) == FAILURE) {
3583				(void) fprintf(stderr,
3584				    "Can't symlink /dev/syscon to %s: %s", ln,
3585				    strerror(errno));
3586
3587				/* Try to leave a syscon */
3588				(void) link(SYSTTY, SYSCON);
3589				(void) audit_put_record(ADT_FAILURE,
3590				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
3591				exit(1);
3592			}
3593
3594			/*
3595			 * Try to leave a message on system console saying where
3596			 * /dev/syscon is currently connected.
3597			 */
3598			if ((fp = fopen(SYSTTY, "r+")) != NULL) {
3599				(void) fprintf(fp,
3600				    "\n****	SYSCON CHANGED TO %s	****\n",
3601				    ln);
3602				(void) fclose(fp);
3603			}
3604		}
3605	}
3606
3607	update_boot_archive(init_signal);
3608
3609	(void) audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]);
3610
3611	/*
3612	 * Signal init; init will take care of telling svc.startd.
3613	 */
3614	if (kill(init_pid, init_signal) == FAILURE) {
3615		(void) fprintf(stderr, "Must be super-user\n");
3616		(void) audit_put_record(ADT_FAILURE,
3617		    ADT_FAIL_VALUE_AUTH, argv[1]);
3618		exit(1);
3619	}
3620
3621	exit(0);
3622}
3623
3624
3625#define	DELTA	25	/* Number of pidlist elements to allocate at a time */
3626
3627/* ARGSUSED */
3628void
3629sigpoll(int n)
3630{
3631	struct pidrec prec;
3632	struct pidrec *p = &prec;
3633	struct pidlist *plp;
3634	struct pidlist *tp, *savetp;
3635	int i;
3636
3637	if (Pfd < 0) {
3638		return;
3639	}
3640
3641	for (;;) {
3642		/*
3643		 * Important Note: Either read will really fail (in which case
3644		 * return is all we can do) or will get EAGAIN (Pfd was opened
3645		 * O_NDELAY), in which case we also want to return.
3646		 * Always return from here!
3647		 */
3648		if (read(Pfd, p, sizeof (struct pidrec)) !=
3649						sizeof (struct pidrec)) {
3650			return;
3651		}
3652		switch (p->pd_type) {
3653
3654		case ADDPID:
3655			/*
3656			 * New "godchild", add to list.
3657			 */
3658			if (Plfree == NULL) {
3659				plp = (struct pidlist *)calloc(DELTA,
3660				    sizeof (struct pidlist));
3661				if (plp == NULL) {
3662					/* Can't save pid */
3663					break;
3664				}
3665				/*
3666				 * Point at 2nd record allocated, we'll use plp.
3667				 */
3668				tp = plp + 1;
3669				/*
3670				 * Link them into a chain.
3671				 */
3672				Plfree = tp;
3673				for (i = 0; i < DELTA - 2; i++) {
3674					tp->pl_next = tp + 1;
3675					tp++;
3676				}
3677			} else {
3678				plp = Plfree;
3679				Plfree = plp->pl_next;
3680			}
3681			plp->pl_pid = p->pd_pid;
3682			plp->pl_dflag = 0;
3683			plp->pl_next = NULL;
3684			/*
3685			 * Note - pid list is kept in increasing order of pids.
3686			 */
3687			if (Plhead == NULL) {
3688				Plhead = plp;
3689				/* Back up to read next record */
3690				break;
3691			} else {
3692				savetp = tp = Plhead;
3693				while (tp) {
3694					if (plp->pl_pid > tp->pl_pid) {
3695						savetp = tp;
3696						tp = tp->pl_next;
3697						continue;
3698					} else if (plp->pl_pid < tp->pl_pid) {
3699						if (tp == Plhead) {
3700							plp->pl_next = Plhead;
3701							Plhead = plp;
3702						} else {
3703							plp->pl_next =
3704							    savetp->pl_next;
3705							savetp->pl_next = plp;
3706						}
3707						break;
3708					} else {
3709						/* Already in list! */
3710						plp->pl_next = Plfree;
3711						Plfree = plp;
3712						break;
3713					}
3714				}
3715				if (tp == NULL) {
3716					/* Add to end of list */
3717					savetp->pl_next = plp;
3718				}
3719			}
3720			/* Back up to read next record. */
3721			break;
3722
3723		case REMPID:
3724			/*
3725			 * This one was handled by someone else,
3726			 * purge it from the list.
3727			 */
3728			if (Plhead == NULL) {
3729				/* Back up to read next record. */
3730				break;
3731			}
3732			savetp = tp = Plhead;
3733			while (tp) {
3734				if (p->pd_pid > tp->pl_pid) {
3735					/* Keep on looking. */
3736					savetp = tp;
3737					tp = tp->pl_next;
3738					continue;
3739				} else if (p->pd_pid < tp->pl_pid) {
3740					/* Not in list. */
3741					break;
3742				} else {
3743					/* Found it. */
3744					if (tp == Plhead)
3745						Plhead = tp->pl_next;
3746					else
3747						savetp->pl_next = tp->pl_next;
3748					tp->pl_next = Plfree;
3749					Plfree = tp;
3750					break;
3751				}
3752			}
3753			/* Back up to read next record. */
3754			break;
3755		default:
3756			console(B_TRUE, "Bad message on initpipe\n");
3757			break;
3758		}
3759	}
3760}
3761
3762
3763static void
3764cleanaux()
3765{
3766	struct pidlist *savep, *p;
3767	pid_t	pid;
3768	short	status;
3769
3770	(void) sigset(SIGCLD, SIG_DFL);
3771	Gchild = 0;	/* Note - Safe to do this here since no SIGCLDs */
3772	(void) sighold(SIGPOLL);
3773	savep = p = Plhead;
3774	while (p) {
3775		if (p->pl_dflag) {
3776			/*
3777			 * Found an entry to delete,
3778			 * remove it from list first.
3779			 */
3780			pid = p->pl_pid;
3781			status = p->pl_exit;
3782			if (p == Plhead) {
3783				Plhead = p->pl_next;
3784				p->pl_next = Plfree;
3785				Plfree = p;
3786				savep = p = Plhead;
3787			} else {
3788				savep->pl_next = p->pl_next;
3789				p->pl_next = Plfree;
3790				Plfree = p;
3791				p = savep->pl_next;
3792			}
3793			clearent(pid, status);
3794			continue;
3795		}
3796		savep = p;
3797		p = p->pl_next;
3798	}
3799	(void) sigrelse(SIGPOLL);
3800	(void) sigset(SIGCLD, childeath);
3801}
3802
3803
3804/*
3805 * /etc/inittab has more entries and we have run out of room in the proc_table
3806 * array. Double the size of proc_table to accomodate the extra entries.
3807 */
3808static void
3809increase_proc_table_size()
3810{
3811	sigset_t block, unblock;
3812	void *ptr;
3813	size_t delta = num_proc * sizeof (struct PROC_TABLE);
3814
3815
3816	/*
3817	 * Block signals for realloc.
3818	 */
3819	(void) sigfillset(&block);
3820	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
3821
3822
3823	/*
3824	 * On failure we just return because callers of this function check
3825	 * for failure.
3826	 */
3827	do
3828		ptr = realloc(g_state, g_state_sz + delta);
3829	while (ptr == NULL && errno == EAGAIN);
3830
3831	if (ptr != NULL) {
3832		/* ensure that the new part is initialized to zero */
3833		bzero((caddr_t)ptr + g_state_sz, delta);
3834
3835		g_state = ptr;
3836		g_state_sz += delta;
3837		num_proc <<= 1;
3838	}
3839
3840
3841	/* unblock our signals before returning */
3842	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
3843}
3844
3845
3846
3847/*
3848 * Sanity check g_state.
3849 */
3850static int
3851st_sane()
3852{
3853	int i;
3854	struct PROC_TABLE *ptp;
3855
3856
3857	/* Note: cur_state is encoded as a signal number */
3858	if (cur_state < 1 || cur_state == 9 || cur_state > 13)
3859		return (0);
3860
3861	/* Check num_proc */
3862	if (g_state_sz != sizeof (struct init_state) + (num_proc - 1) *
3863	    sizeof (struct PROC_TABLE))
3864		return (0);
3865
3866	/* Check proc_table */
3867	for (i = 0, ptp = proc_table; i < num_proc; ++i, ++ptp) {
3868		/* skip unoccupied entries */
3869		if (!(ptp->p_flags & OCCUPIED))
3870			continue;
3871
3872		/* p_flags has no bits outside of PF_MASK */
3873		if (ptp->p_flags & ~(PF_MASK))
3874			return (0);
3875
3876		/* 5 <= pid <= MAXPID */
3877		if (ptp->p_pid < 5 || ptp->p_pid > MAXPID)
3878			return (0);
3879
3880		/* p_count >= 0 */
3881		if (ptp->p_count < 0)
3882			return (0);
3883
3884		/* p_time >= 0 */
3885		if (ptp->p_time < 0)
3886			return (0);
3887	}
3888
3889	return (1);
3890}
3891
3892/*
3893 * Initialize our state.
3894 *
3895 * If the system just booted, then init_state_file, which is located on an
3896 * everpresent tmpfs filesystem, should not exist.
3897 *
3898 * If we were restarted, then init_state_file should exist, in
3899 * which case we'll read it in, sanity check it, and use it.
3900 *
3901 * Note: You can't call console() until proc_table is ready.
3902 */
3903void
3904st_init()
3905{
3906	struct stat stb;
3907	int ret, st_fd, insane = 0;
3908	size_t to_be_read;
3909	char *ptr;
3910
3911
3912	booting = 1;
3913
3914	do {
3915		/*
3916		 * If we can exclusively create the file, then we're the
3917		 * initial invocation of init(1M).
3918		 */
3919		st_fd = open(init_state_file, O_RDWR | O_CREAT | O_EXCL,
3920		    S_IRUSR | S_IWUSR);
3921	} while (st_fd == -1 && errno == EINTR);
3922	if (st_fd != -1)
3923		goto new_state;
3924
3925	booting = 0;
3926
3927	do {
3928		st_fd = open(init_state_file, O_RDWR, S_IRUSR | S_IWUSR);
3929	} while (st_fd == -1 && errno == EINTR);
3930	if (st_fd == -1)
3931		goto new_state;
3932
3933	/* Get the size of the file. */
3934	do
3935		ret = fstat(st_fd, &stb);
3936	while (ret == -1 && errno == EINTR);
3937	if (ret == -1)
3938		goto new_state;
3939
3940	do
3941		g_state = malloc(stb.st_size);
3942	while (g_state == NULL && errno == EAGAIN);
3943	if (g_state == NULL)
3944		goto new_state;
3945
3946	to_be_read = stb.st_size;
3947	ptr = (char *)g_state;
3948	while (to_be_read > 0) {
3949		ssize_t read_ret;
3950
3951		read_ret = read(st_fd, ptr, to_be_read);
3952		if (read_ret < 0) {
3953			if (errno == EINTR)
3954				continue;
3955
3956			goto new_state;
3957		}
3958
3959		to_be_read -= read_ret;
3960		ptr += read_ret;
3961	}
3962
3963	(void) close(st_fd);
3964
3965	g_state_sz = stb.st_size;
3966
3967	if (st_sane()) {
3968		console(B_TRUE, "Restarting.\n");
3969		return;
3970	}
3971
3972	insane = 1;
3973
3974new_state:
3975	if (st_fd >= 0)
3976		(void) close(st_fd);
3977	else
3978		(void) unlink(init_state_file);
3979
3980	if (g_state != NULL)
3981		free(g_state);
3982
3983	/* Something went wrong, so allocate new state. */
3984	g_state_sz = sizeof (struct init_state) +
3985	    ((init_num_proc - 1) * sizeof (struct PROC_TABLE));
3986	do
3987		g_state = calloc(1, g_state_sz);
3988	while (g_state == NULL && errno == EAGAIN);
3989	if (g_state == NULL) {
3990		/* Fatal error! */
3991		exit(errno);
3992	}
3993
3994	g_state->ist_runlevel = -1;
3995	num_proc = init_num_proc;
3996
3997	if (!booting) {
3998		console(B_TRUE, "Restarting.\n");
3999
4000		/* Overwrite the bad state file. */
4001		st_write();
4002
4003		if (!insane) {
4004			console(B_TRUE,
4005			    "Error accessing persistent state file `%s'.  "
4006			    "Ignored.\n", init_state_file);
4007		} else {
4008			console(B_TRUE,
4009			    "Persistent state file `%s' is invalid and was "
4010			    "ignored.\n", init_state_file);
4011		}
4012	}
4013}
4014
4015/*
4016 * Write g_state out to the state file.
4017 */
4018void
4019st_write()
4020{
4021	static int complained = 0;
4022
4023	int st_fd;
4024	char *cp;
4025	size_t sz;
4026	ssize_t ret;
4027
4028
4029	do {
4030		st_fd = open(init_next_state_file,
4031		    O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
4032	} while (st_fd < 0 && errno == EINTR);
4033	if (st_fd < 0)
4034		goto err;
4035
4036	cp = (char *)g_state;
4037	sz = g_state_sz;
4038	while (sz > 0) {
4039		ret = write(st_fd, cp, sz);
4040		if (ret < 0) {
4041			if (errno == EINTR)
4042				continue;
4043
4044			goto err;
4045		}
4046
4047		sz -= ret;
4048		cp += ret;
4049	}
4050
4051	(void) close(st_fd);
4052	st_fd = -1;
4053	if (rename(init_next_state_file, init_state_file)) {
4054		(void) unlink(init_next_state_file);
4055		goto err;
4056	}
4057	complained = 0;
4058
4059	return;
4060
4061err:
4062	if (st_fd >= 0)
4063		(void) close(st_fd);
4064
4065	if (!booting && !complained) {
4066		/*
4067		 * Only complain after the filesystem should have come up.
4068		 * And only do it once so we don't loop between console()
4069		 * & efork().
4070		 */
4071		complained = 1;
4072		if (st_fd)
4073			console(B_TRUE, "Couldn't write persistent state "
4074			    "file `%s'.\n", init_state_file);
4075		else
4076			console(B_TRUE, "Couldn't move persistent state "
4077			    "file `%s' to `%s'.\n", init_next_state_file,
4078			    init_state_file);
4079	}
4080}
4081
4082/*
4083 * Create a contract with these parameters.
4084 */
4085static int
4086contract_make_template(uint_t info, uint_t critical, uint_t fatal,
4087    uint64_t cookie)
4088{
4089	int fd, err;
4090
4091	char *ioctl_tset_emsg =
4092	    "Couldn't set \"%s\" contract template parameter: %s.\n";
4093
4094	do
4095		fd = open64(CTFS_ROOT "/process/template", O_RDWR);
4096	while (fd < 0 && errno == EINTR);
4097	if (fd < 0) {
4098		console(B_TRUE, "Couldn't create process template: %s.\n",
4099		    strerror(errno));
4100		return (-1);
4101	}
4102
4103	if (err = ct_pr_tmpl_set_param(fd, CT_PR_INHERIT | CT_PR_REGENT))
4104		console(B_TRUE, "Contract set template inherit, regent "
4105		    "failed: %s.\n", strerror(err));
4106
4107	/*
4108	 * These errors result in a misconfigured template, which is better
4109	 * than no template at all, so warn but don't abort.
4110	 */
4111	if (err = ct_tmpl_set_informative(fd, info))
4112		console(B_TRUE, ioctl_tset_emsg, "informative", strerror(err));
4113
4114	if (err = ct_tmpl_set_critical(fd, critical))
4115		console(B_TRUE, ioctl_tset_emsg, "critical", strerror(err));
4116
4117	if (err = ct_pr_tmpl_set_fatal(fd, fatal))
4118		console(B_TRUE, ioctl_tset_emsg, "fatal", strerror(err));
4119
4120	if (err = ct_tmpl_set_cookie(fd, cookie))
4121		console(B_TRUE, ioctl_tset_emsg, "cookie", strerror(err));
4122
4123	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4124
4125	return (fd);
4126}
4127
4128/*
4129 * Create the templates and open an event file descriptor.  We use dup2(2) to
4130 * get these descriptors away from the stdin/stdout/stderr group.
4131 */
4132static void
4133contracts_init()
4134{
4135	int err, fd;
4136
4137	/*
4138	 * Create & configure a legacy template.  We only want empty events so
4139	 * we know when to abandon them.
4140	 */
4141	legacy_tmpl = contract_make_template(0, CT_PR_EV_EMPTY, CT_PR_EV_HWERR,
4142	    ORDINARY_COOKIE);
4143	if (legacy_tmpl >= 0) {
4144		err = ct_tmpl_activate(legacy_tmpl);
4145		if (err != 0) {
4146			(void) close(legacy_tmpl);
4147			legacy_tmpl = -1;
4148			console(B_TRUE,
4149			    "Couldn't activate legacy template (%s); "
4150			    "legacy services will be in init's contract.\n",
4151			    strerror(err));
4152		}
4153	} else
4154		console(B_TRUE,
4155		    "Legacy services will be in init's contract.\n");
4156
4157	if (dup2(legacy_tmpl, 255) == -1) {
4158		console(B_TRUE, "Could not duplicate legacy template: %s.\n",
4159		    strerror(errno));
4160	} else {
4161		(void) close(legacy_tmpl);
4162		legacy_tmpl = 255;
4163	}
4164
4165	(void) fcntl(legacy_tmpl, F_SETFD, FD_CLOEXEC);
4166
4167	startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4168	    CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE, STARTD_COOKIE);
4169
4170	if (dup2(startd_tmpl, 254) == -1) {
4171		console(B_TRUE, "Could not duplicate startd template: %s.\n",
4172		    strerror(errno));
4173	} else {
4174		(void) close(startd_tmpl);
4175		startd_tmpl = 254;
4176	}
4177
4178	(void) fcntl(startd_tmpl, F_SETFD, FD_CLOEXEC);
4179
4180	if (legacy_tmpl < 0 && startd_tmpl < 0) {
4181		/* The creation errors have already been reported. */
4182		console(B_TRUE,
4183		    "Ignoring contract events.  Core smf(5) services will not "
4184		    "be restarted.\n");
4185		return;
4186	}
4187
4188	/*
4189	 * Open an event endpoint.
4190	 */
4191	do
4192		fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
4193	while (fd < 0 && errno == EINTR);
4194	if (fd < 0) {
4195		console(B_TRUE,
4196		    "Couldn't open process pbundle: %s.  Core smf(5) services "
4197		    "will not be restarted.\n", strerror(errno));
4198		return;
4199	}
4200
4201	if (dup2(fd, 253) == -1) {
4202		console(B_TRUE, "Could not duplicate process bundle: %s.\n",
4203		    strerror(errno));
4204	} else {
4205		(void) close(fd);
4206		fd = 253;
4207	}
4208
4209	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
4210
4211	/* Reset in case we've been restarted. */
4212	(void) ct_event_reset(fd);
4213
4214	poll_fds[0].fd = fd;
4215	poll_fds[0].events = POLLIN;
4216	poll_nfds = 1;
4217}
4218
4219static int
4220contract_getfile(ctid_t id, const char *name, int oflag)
4221{
4222	int fd;
4223
4224	do
4225		fd = contract_open(id, "process", name, oflag);
4226	while (fd < 0 && errno == EINTR);
4227
4228	if (fd < 0)
4229		console(B_TRUE, "Couldn't open %s for contract %ld: %s.\n",
4230		    name, id, strerror(errno));
4231
4232	return (fd);
4233}
4234
4235static int
4236contract_cookie(ctid_t id, uint64_t *cp)
4237{
4238	int fd, err;
4239	ct_stathdl_t sh;
4240
4241	fd = contract_getfile(id, "status", O_RDONLY);
4242	if (fd < 0)
4243		return (-1);
4244
4245	err = ct_status_read(fd, CTD_COMMON, &sh);
4246	if (err != 0) {
4247		console(B_TRUE, "Couldn't read status of contract %ld: %s.\n",
4248		    id, strerror(err));
4249		(void) close(fd);
4250		return (-1);
4251	}
4252
4253	(void) close(fd);
4254
4255	*cp = ct_status_get_cookie(sh);
4256
4257	ct_status_free(sh);
4258	return (0);
4259}
4260
4261static void
4262contract_ack(ct_evthdl_t e)
4263{
4264	int fd;
4265
4266	if (ct_event_get_flags(e) & CTE_INFO)
4267		return;
4268
4269	fd = contract_getfile(ct_event_get_ctid(e), "ctl", O_WRONLY);
4270	if (fd < 0)
4271		return;
4272
4273	(void) ct_ctl_ack(fd, ct_event_get_evid(e));
4274	(void) close(fd);
4275}
4276
4277/*
4278 * Process a contract event.
4279 */
4280static void
4281contract_event(struct pollfd *poll)
4282{
4283	ct_evthdl_t e;
4284	int err;
4285	ctid_t ctid;
4286
4287	if (!(poll->revents & POLLIN)) {
4288		if (poll->revents & POLLERR)
4289			console(B_TRUE,
4290			    "Unknown poll error on my process contract "
4291			    "pbundle.\n");
4292		return;
4293	}
4294
4295	err = ct_event_read(poll->fd, &e);
4296	if (err != 0) {
4297		console(B_TRUE, "Error retrieving contract event: %s.\n",
4298		    strerror(err));
4299		return;
4300	}
4301
4302	ctid = ct_event_get_ctid(e);
4303
4304	if (ct_event_get_type(e) == CT_PR_EV_EMPTY) {
4305		uint64_t cookie;
4306		int ret, abandon = 1;
4307
4308		/* If it's svc.startd, restart it.  Else, abandon. */
4309		ret = contract_cookie(ctid, &cookie);
4310
4311		if (ret == 0) {
4312			if (cookie == STARTD_COOKIE &&
4313			    do_restart_startd) {
4314				if (smf_debug)
4315					console(B_TRUE, "Restarting "
4316					    "svc.startd.\n");
4317
4318				/*
4319				 * Account for the failure.  If the failure rate
4320				 * exceeds a threshold, then drop to maintenance
4321				 * mode.
4322				 */
4323				startd_record_failure();
4324				if (startd_failure_rate_critical())
4325					enter_maintenance();
4326
4327				if (startd_tmpl < 0)
4328					console(B_TRUE,
4329					    "Restarting svc.startd in "
4330					    "improper contract (bad "
4331					    "template).\n");
4332
4333				(void) startd_run(startd_cline, startd_tmpl,
4334				    ctid);
4335
4336				abandon = 0;
4337			}
4338		}
4339
4340		if (abandon && (err = contract_abandon_id(ctid))) {
4341			console(B_TRUE, "Couldn't abandon contract %ld: %s.\n",
4342			    ctid, strerror(err));
4343		}
4344
4345		/*
4346		 * No need to acknowledge the event since either way the
4347		 * originating contract should be abandoned.
4348		 */
4349	} else {
4350		console(B_TRUE,
4351		    "Received contract event of unexpected type %d from "
4352		    "contract %ld.\n", ct_event_get_type(e), ctid);
4353
4354		if ((ct_event_get_flags(e) & (CTE_INFO | CTE_ACK)) == 0)
4355			/* Allow unexpected critical events to be released. */
4356			contract_ack(e);
4357	}
4358
4359	ct_event_free(e);
4360}
4361
4362/*
4363 * svc.startd(1M) Management
4364 */
4365
4366/*
4367 * (Re)start svc.startd(1M).  old_ctid should be the contract ID of the old
4368 * contract, or 0 if we're starting it for the first time.  If wait is true
4369 * we'll wait for and return the exit value of the child.
4370 */
4371static int
4372startd_run(const char *cline, int tmpl, ctid_t old_ctid)
4373{
4374	int err, i, ret, did_activate;
4375	pid_t pid;
4376	struct stat sb;
4377
4378	if (cline[0] == '\0')
4379		return (-1);
4380
4381	/*
4382	 * Don't restart startd if the system is rebooting or shutting down.
4383	 */
4384	do {
4385		ret = stat("/etc/svc/volatile/resetting", &sb);
4386	} while (ret == -1 && errno == EINTR);
4387
4388	if (ret == 0) {
4389		if (smf_debug)
4390			console(B_TRUE, "Quiescing for reboot.\n");
4391		(void) pause();
4392		return (-1);
4393	}
4394
4395	err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4396	if (err == EINVAL) {
4397		console(B_TRUE, "Remake startd_tmpl; reattempt transfer.\n");
4398		tmpl = startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
4399		    CT_PR_EV_HWERR, STARTD_COOKIE);
4400
4401		err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
4402	}
4403	if (err != 0) {
4404		console(B_TRUE,
4405		    "Couldn't set transfer parameter of contract template: "
4406		    "%s.\n", strerror(err));
4407	}
4408
4409	if ((err = ct_pr_tmpl_set_svc_fmri(startd_tmpl,
4410	    SCF_SERVICE_STARTD)) != 0)
4411		console(B_TRUE,
4412		    "Can not set svc_fmri in contract template: %s\n",
4413		    strerror(err));
4414	if ((err = ct_pr_tmpl_set_svc_aux(startd_tmpl,
4415	    startd_svc_aux)) != 0)
4416		console(B_TRUE,
4417		    "Can not set svc_aux in contract template: %s\n",
4418		    strerror(err));
4419	did_activate = !(ct_tmpl_activate(tmpl));
4420	if (!did_activate)
4421		console(B_TRUE,
4422		    "Template activation failed; not starting \"%s\" in "
4423		    "proper contract.\n", cline);
4424
4425	/* Hold SIGCHLD so we can wait if necessary. */
4426	(void) sighold(SIGCHLD);
4427
4428	while ((pid = fork()) < 0) {
4429		if (errno == EPERM) {
4430			console(B_TRUE, "Insufficient permission to fork.\n");
4431
4432			/* Now that's a doozy. */
4433			exit(1);
4434		}
4435
4436		console(B_TRUE,
4437		    "fork() for svc.startd failed: %s.  Will retry in 1 "
4438		    "second...\n", strerror(errno));
4439
4440		(void) sleep(1);
4441
4442		/* Eventually give up? */
4443	}
4444
4445	if (pid == 0) {
4446		/* child */
4447
4448		/* See the comment in efork() */
4449		for (i = SIGHUP; i <= SIGRTMAX; ++i) {
4450			if (i == SIGTTOU || i == SIGTTIN || i == SIGTSTP)
4451				(void) sigset(i, SIG_IGN);
4452			else
4453				(void) sigset(i, SIG_DFL);
4454		}
4455
4456		if (smf_options != NULL) {
4457			/* Put smf_options in the environment. */
4458			glob_envp[glob_envn] =
4459			    malloc(sizeof ("SMF_OPTIONS=") - 1 +
4460				strlen(smf_options) + 1);
4461
4462			if (glob_envp[glob_envn] != NULL) {
4463				/* LINTED */
4464				(void) sprintf(glob_envp[glob_envn],
4465				    "SMF_OPTIONS=%s", smf_options);
4466				glob_envp[glob_envn+1] = NULL;
4467			} else {
4468				console(B_TRUE,
4469				    "Could not set SMF_OPTIONS (%s).\n",
4470				    strerror(errno));
4471			}
4472		}
4473
4474		if (smf_debug)
4475			console(B_TRUE, "Executing svc.startd\n");
4476
4477		(void) execle(SH, "INITSH", "-c", cline, NULL, glob_envp);
4478
4479		console(B_TRUE, "Could not exec \"%s\" (%s).\n", SH,
4480		    strerror(errno));
4481
4482		exit(1);
4483	}
4484
4485	/* parent */
4486
4487	if (did_activate) {
4488		if (legacy_tmpl < 0 || ct_tmpl_activate(legacy_tmpl) != 0)
4489			(void) ct_tmpl_clear(tmpl);
4490	}
4491
4492	/* Clear the old_ctid reference so the kernel can reclaim it. */
4493	if (old_ctid != 0)
4494		(void) ct_pr_tmpl_set_transfer(tmpl, 0);
4495
4496	(void) sigrelse(SIGCHLD);
4497
4498	return (0);
4499}
4500
4501/*
4502 * void startd_record_failure(void)
4503 *   Place the current time in our circular array of svc.startd failures.
4504 */
4505void
4506startd_record_failure()
4507{
4508	int index = startd_failure_index++ % NSTARTD_FAILURE_TIMES;
4509
4510	startd_failure_time[index] = gethrtime();
4511}
4512
4513/*
4514 * int startd_failure_rate_critical(void)
4515 *   Return true if the average failure interval is less than the permitted
4516 *   interval.  Implicit success if insufficient measurements for an average
4517 *   exist.
4518 */
4519int
4520startd_failure_rate_critical()
4521{
4522	int n = startd_failure_index;
4523	hrtime_t avg_ns = 0;
4524
4525	if (startd_failure_index < NSTARTD_FAILURE_TIMES)
4526		return (0);
4527
4528	avg_ns =
4529	    (startd_failure_time[(n - 1) % NSTARTD_FAILURE_TIMES] -
4530	    startd_failure_time[n % NSTARTD_FAILURE_TIMES]) /
4531	    NSTARTD_FAILURE_TIMES;
4532
4533	return (avg_ns < STARTD_FAILURE_RATE_NS);
4534}
4535
4536/*
4537 * returns string that must be free'd
4538 */
4539
4540static char
4541*audit_boot_msg()
4542{
4543	char		*b, *p;
4544	char		desc[] = "booted";
4545	zoneid_t	zid = getzoneid();
4546
4547	b = malloc(sizeof (desc) + MAXNAMELEN + 3);
4548	if (b == NULL)
4549		return (b);
4550
4551	p = b;
4552	p += strlcpy(p, desc, sizeof (desc));
4553	if (zid != GLOBAL_ZONEID) {
4554		p += strlcpy(p, ": ", 3);
4555		(void) getzonenamebyid(zid, p, MAXNAMELEN);
4556	}
4557	return (b);
4558}
4559
4560/*
4561 * Generate AUE_init_solaris audit record.  Return 1 if
4562 * auditing is enabled in case the caller cares.
4563 *
4564 * In the case of userint() or a local zone invocation of
4565 * one_true_init, the process initially contains the audit
4566 * characteristics of the process that invoked init.  The first pass
4567 * through here uses those characteristics then for the case of
4568 * one_true_init in a local zone, clears them so subsequent system
4569 * state changes won't be attributed to the person who booted the
4570 * zone.
4571 */
4572static int
4573audit_put_record(int pass_fail, int status, char *msg)
4574{
4575	adt_session_data_t	*ah;
4576	adt_event_data_t	*event;
4577
4578	if (!adt_audit_enabled())
4579		return (0);
4580
4581	/*
4582	 * the PROC_DATA picks up the context to tell whether this is
4583	 * an attributed record (auid = -2 is unattributed)
4584	 */
4585	if (adt_start_session(&ah, NULL, ADT_USE_PROC_DATA)) {
4586		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4587		return (1);
4588	}
4589	event = adt_alloc_event(ah, ADT_init_solaris);
4590	if (event == NULL) {
4591		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4592		(void) adt_end_session(ah);
4593		return (1);
4594	}
4595	event->adt_init_solaris.info = msg;	/* NULL is ok here */
4596
4597	if (adt_put_event(event, pass_fail, status)) {
4598		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
4599		(void) adt_end_session(ah);
4600		return (1);
4601	}
4602	adt_free_event(event);
4603
4604	(void) adt_end_session(ah);
4605
4606	return (1);
4607}
4608