fork.c revision 11996:91b62f7b8186
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * fork.c - safe forking for svc.startd
28 *
29 * fork_configd() and fork_sulogin() are related, special cases that handle the
30 * spawning of specific client processes for svc.startd.
31 */
32
33#include <sys/contract/process.h>
34#include <sys/corectl.h>
35#include <sys/ctfs.h>
36#include <sys/stat.h>
37#include <sys/types.h>
38#include <sys/uio.h>
39#include <sys/wait.h>
40#include <assert.h>
41#include <errno.h>
42#include <fcntl.h>
43#include <libcontract.h>
44#include <libcontract_priv.h>
45#include <libscf_priv.h>
46#include <limits.h>
47#include <poll.h>
48#include <port.h>
49#include <signal.h>
50#include <stdarg.h>
51#include <stdio.h>
52#include <stdlib.h>
53#include <string.h>
54#include <unistd.h>
55#include <utmpx.h>
56#include <spawn.h>
57
58#include "manifest_hash.h"
59#include "configd_exit.h"
60#include "protocol.h"
61#include "startd.h"
62
63static	struct	utmpx	*utmpp;	/* pointer for getutxent() */
64
65pid_t
66startd_fork1(int *forkerr)
67{
68	pid_t p;
69
70	/*
71	 * prefork stack
72	 */
73	wait_prefork();
74
75	p = fork1();
76
77	if (p == -1 && forkerr != NULL)
78		*forkerr = errno;
79
80	/*
81	 * postfork stack
82	 */
83	wait_postfork(p);
84
85	return (p);
86}
87
88/*
89 * void fork_mount(char *, char *)
90 *   Run mount(1M) with the given options and mount point.  (mount(1M) has much
91 *   hidden knowledge; it's much less correct to reimplement that logic here to
92 *   save a fork(2)/exec(2) invocation.)
93 */
94int
95fork_mount(char *path, char *opts)
96{
97	pid_t pid;
98	uint_t tries = 0;
99	int status;
100
101	for (pid = fork1(); pid == -1; pid = fork1()) {
102		if (++tries > MAX_MOUNT_RETRIES)
103			return (-1);
104
105		(void) sleep(tries);
106	}
107
108	if (pid != 0) {
109		(void) waitpid(pid, &status, 0);
110
111		/*
112		 * If our mount(1M) invocation exited by peculiar means, or with
113		 * a non-zero status, our mount likelihood is low.
114		 */
115		if (!WIFEXITED(status) ||
116		    WEXITSTATUS(status) != 0)
117			return (-1);
118
119		return (0);
120	}
121
122	(void) execl("/sbin/mount", "mount", "-o", opts, path, NULL);
123
124	return (-1);
125}
126
127/*
128 * pid_t fork_common(...)
129 *   Common routine used by fork_sulogin, fork_emi, and fork_configd to
130 *   fork a process in a contract with the provided terms.  Invokes
131 *   fork_sulogin (with its no-fork argument set) on errors.
132 */
133static pid_t
134fork_common(const char *name, const char *svc_fmri, int retries, ctid_t *ctidp,
135    uint_t inf, uint_t crit, uint_t fatal, uint_t param, uint64_t cookie)
136{
137	uint_t tries = 0;
138	int ctfd, err;
139	pid_t pid;
140
141	/*
142	 * Establish process contract terms.
143	 */
144	if ((ctfd = open64(CTFS_ROOT "/process/template", O_RDWR)) == -1) {
145		fork_sulogin(B_TRUE, "Could not open process contract template "
146		    "for %s: %s\n", name, strerror(errno));
147		/* NOTREACHED */
148	}
149
150	err = ct_tmpl_set_critical(ctfd, crit);
151	err |= ct_pr_tmpl_set_fatal(ctfd, fatal);
152	err |= ct_tmpl_set_informative(ctfd, inf);
153	err |= ct_pr_tmpl_set_param(ctfd, param);
154	err |= ct_tmpl_set_cookie(ctfd, cookie);
155	err |= ct_pr_tmpl_set_svc_fmri(ctfd, svc_fmri);
156	err |= ct_pr_tmpl_set_svc_aux(ctfd, name);
157	if (err) {
158		(void) close(ctfd);
159		fork_sulogin(B_TRUE, "Could not set %s process contract "
160		    "terms\n", name);
161		/* NOTREACHED */
162	}
163
164	if (err = ct_tmpl_activate(ctfd)) {
165		(void) close(ctfd);
166		fork_sulogin(B_TRUE, "Could not activate %s process contract "
167		    "template: %s\n", name, strerror(err));
168		/* NOTREACHED */
169	}
170
171	/*
172	 * Attempt to fork "retries" times.
173	 */
174	for (pid = fork1(); pid == -1; pid = fork1()) {
175		if (++tries > retries) {
176			/*
177			 * When we exit the sulogin session, init(1M)
178			 * will restart svc.startd(1M).
179			 */
180			err = errno;
181			(void) ct_tmpl_clear(ctfd);
182			(void) close(ctfd);
183			fork_sulogin(B_TRUE, "Could not fork to start %s: %s\n",
184			    name, strerror(err));
185			/* NOTREACHED */
186		}
187		(void) sleep(tries);
188	}
189
190	/*
191	 * Clean up, return pid and ctid.
192	 */
193	if (pid != 0 && (errno = contract_latest(ctidp)) != 0)
194		uu_die("Could not get new contract id for %s\n", name);
195	(void) ct_tmpl_clear(ctfd);
196	(void) close(ctfd);
197
198	return (pid);
199}
200
201/*
202 * void fork_sulogin(boolean_t, const char *, ...)
203 *   When we are invoked with the -s flag from boot (or run into an unfixable
204 *   situation), we run a private copy of sulogin.  When the sulogin session
205 *   is ended, we continue.  This is the last fallback action for system
206 *   maintenance.
207 *
208 *   If immediate is true, fork_sulogin() executes sulogin(1M) directly, without
209 *   forking.
210 *
211 *   Because fork_sulogin() is needed potentially before we daemonize, we leave
212 *   it outside the wait_register() framework.
213 */
214/*PRINTFLIKE2*/
215void
216fork_sulogin(boolean_t immediate, const char *format, ...)
217{
218	va_list args;
219	int fd_console;
220
221	(void) printf("Requesting System Maintenance Mode\n");
222
223	if (!booting_to_single_user)
224		(void) printf("(See /lib/svc/share/README for more "
225		    "information.)\n");
226
227	va_start(args, format);
228	(void) vprintf(format, args);
229	va_end(args);
230
231	if (!immediate) {
232		ctid_t	ctid;
233		pid_t	pid;
234
235		pid = fork_common("sulogin", SVC_SULOGIN_FMRI,
236		    MAX_SULOGIN_RETRIES, &ctid, CT_PR_EV_HWERR, 0,
237		    CT_PR_EV_HWERR, CT_PR_PGRPONLY, SULOGIN_COOKIE);
238
239		if (pid != 0) {
240			(void) waitpid(pid, NULL, 0);
241			contract_abandon(ctid);
242			return;
243		}
244		/* close all inherited fds */
245		closefrom(0);
246	} else {
247		(void) printf("Directly executing sulogin.\n");
248		/*
249		 * Can't call closefrom() in this MT section
250		 * so safely close a minimum set of fds.
251		 */
252		(void) close(STDIN_FILENO);
253		(void) close(STDOUT_FILENO);
254		(void) close(STDERR_FILENO);
255	}
256
257	(void) setpgrp();
258
259	/* open the console for sulogin */
260	if ((fd_console = open("/dev/console", O_RDWR)) >= 0) {
261		if (fd_console != STDIN_FILENO)
262			while (dup2(fd_console, STDIN_FILENO) < 0 &&
263			    errno == EINTR)
264				;
265		if (fd_console != STDOUT_FILENO)
266			while (dup2(fd_console, STDOUT_FILENO) < 0 &&
267			    errno == EINTR)
268				;
269		if (fd_console != STDERR_FILENO)
270			while (dup2(fd_console, STDERR_FILENO) < 0 &&
271			    errno == EINTR)
272				;
273		if (fd_console > STDERR_FILENO)
274			(void) close(fd_console);
275	}
276
277	setutxent();
278	while ((utmpp = getutxent()) != NULL) {
279		if (strcmp(utmpp->ut_user, "LOGIN") != 0) {
280			if (strcmp(utmpp->ut_line, "console") == 0) {
281				(void) kill(utmpp->ut_pid, 9);
282				break;
283			}
284		}
285	}
286
287	(void) execl("/sbin/sulogin", "sulogin", NULL);
288
289	uu_warn("Could not exec() sulogin");
290
291	exit(1);
292}
293
294#define	CONFIGD_PATH	"/lib/svc/bin/svc.configd"
295
296/*
297 * void fork_configd(int status)
298 *   We are interested in exit events (since the parent's exiting means configd
299 *   is ready to run and since the child's exiting indicates an error case) and
300 *   in empty events.  This means we have a unique template for initiating
301 *   configd.
302 */
303void
304fork_configd(int exitstatus)
305{
306	pid_t pid;
307	ctid_t ctid = -1;
308	int err;
309	char path[PATH_MAX];
310
311	/*
312	 * Checking the existatus for the potential failure of the
313	 * daemonized svc.configd.  If this is not the first time
314	 * through, but a call from the svc.configd monitoring thread
315	 * after a failure this is the status that is expected.  Other
316	 * failures are exposed during initialization or are fixed
317	 * by a restart (e.g door closings).
318	 *
319	 * If this is on-disk database corruption it will also be
320	 * caught by a restart but could be cleared before the restart.
321	 *
322	 * Or this could be internal database corruption due to a
323	 * rogue service that needs to be cleared before restart.
324	 */
325	if (WEXITSTATUS(exitstatus) == CONFIGD_EXIT_DATABASE_BAD) {
326		fork_sulogin(B_FALSE, "svc.configd exited with database "
327		    "corrupt error after initialization of the repository\n");
328	}
329
330retry:
331	log_framework(LOG_DEBUG, "fork_configd trying to start svc.configd\n");
332
333	/*
334	 * If we're retrying, we will have an old contract lying around
335	 * from the failure.  Since we're going to be creating a new
336	 * contract shortly, we abandon the old one now.
337	 */
338	if (ctid != -1)
339		contract_abandon(ctid);
340	ctid = -1;
341
342	pid = fork_common("svc.configd", SCF_SERVICE_CONFIGD,
343	    MAX_CONFIGD_RETRIES, &ctid, 0, CT_PR_EV_EXIT, 0,
344	    CT_PR_INHERIT | CT_PR_REGENT, CONFIGD_COOKIE);
345
346	if (pid != 0) {
347		int exitstatus;
348
349		st->st_configd_pid = pid;
350
351		if (waitpid(pid, &exitstatus, 0) == -1) {
352			fork_sulogin(B_FALSE, "waitpid on svc.configd "
353			    "failed: %s\n", strerror(errno));
354		} else if (WIFEXITED(exitstatus)) {
355			char *errstr;
356
357			/*
358			 * Examine exitstatus.  This will eventually get more
359			 * complicated, as we will want to teach startd how to
360			 * invoke configd with alternate repositories, etc.
361			 *
362			 * Note that exec(2) failure results in an exit status
363			 * of 1, resulting in the default clause below.
364			 */
365
366			/*
367			 * Assign readable strings to cases we don't handle, or
368			 * have error outcomes that cannot be eliminated.
369			 */
370			switch (WEXITSTATUS(exitstatus)) {
371			case CONFIGD_EXIT_BAD_ARGS:
372				errstr = "bad arguments";
373				break;
374
375			case CONFIGD_EXIT_DATABASE_BAD:
376				errstr = "database corrupt";
377				break;
378
379			case CONFIGD_EXIT_DATABASE_LOCKED:
380				errstr = "database locked";
381				break;
382			case CONFIGD_EXIT_INIT_FAILED:
383				errstr = "initialization failure";
384				break;
385			case CONFIGD_EXIT_DOOR_INIT_FAILED:
386				errstr = "door initialization failure";
387				break;
388			case CONFIGD_EXIT_DATABASE_INIT_FAILED:
389				errstr = "database initialization failure";
390				break;
391			case CONFIGD_EXIT_NO_THREADS:
392				errstr = "no threads available";
393				break;
394			case CONFIGD_EXIT_LOST_MAIN_DOOR:
395				errstr = "lost door server attachment";
396				break;
397			case 1:
398				errstr = "execution failure";
399				break;
400			default:
401				errstr = "unknown error";
402				break;
403			}
404
405			/*
406			 * Remedial actions for various configd failures.
407			 */
408			switch (WEXITSTATUS(exitstatus)) {
409			case CONFIGD_EXIT_OKAY:
410				break;
411
412			case CONFIGD_EXIT_DATABASE_LOCKED:
413				/* attempt remount of / read-write */
414				if (fs_is_read_only("/", NULL) == 1) {
415					if (fs_remount("/") == -1)
416						fork_sulogin(B_FALSE,
417						    "remount of root "
418						    "filesystem failed\n");
419
420					goto retry;
421				}
422				break;
423
424			default:
425				fork_sulogin(B_FALSE, "svc.configd exited "
426				    "with status %d (%s)\n",
427				    WEXITSTATUS(exitstatus), errstr);
428				goto retry;
429			}
430		} else if (WIFSIGNALED(exitstatus)) {
431			char signame[SIG2STR_MAX];
432
433			if (sig2str(WTERMSIG(exitstatus), signame))
434				(void) snprintf(signame, SIG2STR_MAX,
435				    "signum %d", WTERMSIG(exitstatus));
436
437			fork_sulogin(B_FALSE, "svc.configd signalled:"
438			    " %s\n", signame);
439
440			goto retry;
441		} else {
442			fork_sulogin(B_FALSE, "svc.configd non-exit "
443			    "condition: 0x%x\n", exitstatus);
444
445			goto retry;
446		}
447
448		/*
449		 * Announce that we have a valid svc.configd status.
450		 */
451		MUTEX_LOCK(&st->st_configd_live_lock);
452		st->st_configd_lives = 1;
453		err = pthread_cond_broadcast(&st->st_configd_live_cv);
454		assert(err == 0);
455		MUTEX_UNLOCK(&st->st_configd_live_lock);
456
457		log_framework(LOG_DEBUG, "fork_configd broadcasts configd is "
458		    "live\n");
459		return;
460	}
461
462	/*
463	 * Set our per-process core file path to leave core files in
464	 * /etc/svc/volatile directory, named after the PID to aid in debugging.
465	 */
466	(void) snprintf(path, sizeof (path),
467	    "/etc/svc/volatile/core.configd.%%p");
468
469	(void) core_set_process_path(path, strlen(path) + 1, getpid());
470
471	log_framework(LOG_DEBUG, "executing svc.configd\n");
472
473	(void) execl(CONFIGD_PATH, CONFIGD_PATH, NULL);
474
475	/*
476	 * Status code is used above to identify configd exec failure.
477	 */
478	exit(1);
479}
480
481void *
482fork_configd_thread(void *vctid)
483{
484	int fd, err;
485	ctid_t configd_ctid = (ctid_t)vctid;
486
487	if (configd_ctid == -1) {
488		log_framework(LOG_DEBUG,
489		    "fork_configd_thread starting svc.configd\n");
490		fork_configd(0);
491	} else {
492		/*
493		 * configd_ctid is known:  we broadcast and continue.
494		 * test contract for appropriate state by verifying that
495		 * there is one or more processes within it?
496		 */
497		log_framework(LOG_DEBUG,
498		    "fork_configd_thread accepting svc.configd with CTID %ld\n",
499		    configd_ctid);
500		MUTEX_LOCK(&st->st_configd_live_lock);
501		st->st_configd_lives = 1;
502		(void) pthread_cond_broadcast(&st->st_configd_live_cv);
503		MUTEX_UNLOCK(&st->st_configd_live_lock);
504	}
505
506	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
507	if (fd == -1)
508		uu_die("process bundle open failed");
509
510	/*
511	 * Make sure we get all events (including those generated by configd
512	 * before this thread was started).
513	 */
514	err = ct_event_reset(fd);
515	assert(err == 0);
516
517	for (;;) {
518		int efd, sfd;
519		ct_evthdl_t ev;
520		uint32_t type;
521		ctevid_t evid;
522		ct_stathdl_t status;
523		ctid_t ctid;
524		uint64_t cookie;
525		pid_t pid;
526
527		if (err = ct_event_read_critical(fd, &ev)) {
528			assert(err != EINVAL && err != EAGAIN);
529			log_error(LOG_WARNING,
530			    "Error reading next contract event: %s",
531			    strerror(err));
532			continue;
533		}
534
535		evid = ct_event_get_evid(ev);
536		ctid = ct_event_get_ctid(ev);
537		type = ct_event_get_type(ev);
538
539		/* Fetch cookie. */
540		sfd = contract_open(ctid, "process", "status", O_RDONLY);
541		if (sfd < 0) {
542			ct_event_free(ev);
543			continue;
544		}
545
546		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
547			log_framework(LOG_WARNING, "Could not get status for "
548			    "contract %ld: %s\n", ctid, strerror(err));
549
550			ct_event_free(ev);
551			startd_close(sfd);
552			continue;
553		}
554
555		cookie = ct_status_get_cookie(status);
556
557		ct_status_free(status);
558
559		startd_close(sfd);
560
561		/*
562		 * Don't process events from contracts we aren't interested in.
563		 */
564		if (cookie != CONFIGD_COOKIE) {
565			ct_event_free(ev);
566			continue;
567		}
568
569		if (type == CT_PR_EV_EXIT) {
570			int exitstatus;
571
572			(void) ct_pr_event_get_pid(ev, &pid);
573			(void) ct_pr_event_get_exitstatus(ev,
574			    &exitstatus);
575
576			if (st->st_configd_pid != pid) {
577				/*
578				 * This is the child exiting, so we
579				 * abandon the contract and restart
580				 * configd.
581				 */
582				contract_abandon(ctid);
583				fork_configd(exitstatus);
584			}
585		}
586
587		efd = contract_open(ctid, "process", "ctl", O_WRONLY);
588		if (efd != -1) {
589			(void) ct_ctl_ack(efd, evid);
590			startd_close(efd);
591		}
592
593		ct_event_free(ev);
594
595	}
596
597	/*NOTREACHED*/
598	return (NULL);
599}
600
601void
602fork_rc_script(char rl, const char *arg, boolean_t wait)
603{
604	pid_t pid;
605	int tmpl, err, stat;
606	char path[20] = "/sbin/rc.", log[20] = "rc..log", timebuf[20];
607	time_t now;
608	struct tm ltime;
609	size_t sz;
610	char *pathenv;
611	char **nenv;
612
613	path[8] = rl;
614
615	tmpl = open64(CTFS_ROOT "/process/template", O_RDWR);
616	if (tmpl >= 0) {
617		err = ct_tmpl_set_critical(tmpl, 0);
618		assert(err == 0);
619
620		err = ct_tmpl_set_informative(tmpl, 0);
621		assert(err == 0);
622
623		err = ct_pr_tmpl_set_fatal(tmpl, 0);
624		assert(err == 0);
625
626		err = ct_tmpl_activate(tmpl);
627		assert(err == 0);
628
629		err = close(tmpl);
630		assert(err == 0);
631	} else {
632		uu_warn("Could not create contract template for %s.\n", path);
633	}
634
635	pid = startd_fork1(NULL);
636	if (pid < 0) {
637		return;
638	} else if (pid != 0) {
639		/* parent */
640		if (wait) {
641			do
642				err = waitpid(pid, &stat, 0);
643			while (err != 0 && errno == EINTR)
644				;
645
646			if (!WIFEXITED(stat)) {
647				log_framework(LOG_INFO,
648				    "%s terminated with waitpid() status %d.\n",
649				    path, stat);
650			} else if (WEXITSTATUS(stat) != 0) {
651				log_framework(LOG_INFO,
652				    "%s failed with status %d.\n", path,
653				    WEXITSTATUS(stat));
654			}
655		}
656
657		return;
658	}
659
660	/* child */
661
662	log[2] = rl;
663
664	setlog(log);
665
666	now = time(NULL);
667	sz = strftime(timebuf, sizeof (timebuf), "%b %e %T",
668	    localtime_r(&now, &ltime));
669	assert(sz != 0);
670
671	(void) fprintf(stderr, "%s Executing %s %s\n", timebuf, path, arg);
672
673	if (rl == 'S')
674		pathenv = "PATH=/sbin:/usr/sbin:/usr/bin";
675	else
676		pathenv = "PATH=/usr/sbin:/usr/bin";
677
678	nenv = set_smf_env(NULL, 0, pathenv, NULL, NULL);
679
680	(void) execle(path, path, arg, 0, nenv);
681
682	perror("exec");
683	exit(0);
684}
685
686#define	SVCCFG_PATH	"/usr/sbin/svccfg"
687#define	EMI_MFST	"/lib/svc/manifest/system/early-manifest-import.xml"
688#define	EMI_PATH	"/lib/svc/method/manifest-import"
689
690/*
691 * Set Early Manifest Import service's state and log file.
692 */
693static int
694emi_set_state(restarter_instance_state_t state, boolean_t setlog)
695{
696	int r, ret = 1;
697	instance_data_t idata;
698	scf_handle_t *hndl = NULL;
699	scf_instance_t *inst = NULL;
700
701retry:
702	if (hndl == NULL)
703		hndl = libscf_handle_create_bound(SCF_VERSION);
704
705	if (hndl == NULL) {
706		/*
707		 * In the case that we can't bind to the repository
708		 * (which should have been started), we need to allow
709		 * the user into maintenance mode to determine what's
710		 * failed.
711		 */
712		fork_sulogin(B_FALSE, "Unable to bind a new repository"
713		    " handle: %s\n", scf_strerror(scf_error()));
714		goto retry;
715	}
716
717	if (inst == NULL)
718		inst = safe_scf_instance_create(hndl);
719
720	if (scf_handle_decode_fmri(hndl, SCF_INSTANCE_EMI, NULL, NULL,
721	    inst, NULL, NULL, SCF_DECODE_FMRI_EXACT) == -1) {
722		switch (scf_error()) {
723		case SCF_ERROR_NOT_FOUND:
724			goto out;
725
726		case SCF_ERROR_CONNECTION_BROKEN:
727		case SCF_ERROR_NOT_BOUND:
728			libscf_handle_rebind(hndl);
729			goto retry;
730
731		default:
732			fork_sulogin(B_FALSE, "Couldn't fetch %s service: "
733			    "%s\n", SCF_INSTANCE_EMI,
734			    scf_strerror(scf_error()));
735			goto retry;
736		}
737	}
738
739	if (setlog) {
740		(void) libscf_note_method_log(inst, st->st_log_prefix, EMI_LOG);
741		log_framework(LOG_DEBUG,
742		    "Set logfile property for %s\n", SCF_INSTANCE_EMI);
743	}
744
745	idata.i_fmri = SCF_INSTANCE_EMI;
746	idata.i_state =  RESTARTER_STATE_NONE;
747	idata.i_next_state = RESTARTER_STATE_NONE;
748	switch (r = _restarter_commit_states(hndl, &idata, state,
749	    RESTARTER_STATE_NONE, NULL)) {
750	case 0:
751		break;
752
753	case ECONNABORTED:
754		libscf_handle_rebind(hndl);
755		goto retry;
756
757	case ENOMEM:
758	case ENOENT:
759	case EPERM:
760	case EACCES:
761	case EROFS:
762		fork_sulogin(B_FALSE, "Could not set state of "
763		    "%s: %s\n", SCF_INSTANCE_EMI, strerror(r));
764		goto retry;
765		break;
766
767	case EINVAL:
768	default:
769		bad_error("_restarter_commit_states", r);
770	}
771	ret = 0;
772
773out:
774	scf_instance_destroy(inst);
775	scf_handle_destroy(hndl);
776	return (ret);
777}
778
779/*
780 * It is possible that the early-manifest-import service is disabled.  This
781 * would not be the normal case for Solaris, but it may happen on dedicated
782 * systems.  So this function checks the state of the general/enabled
783 * property for Early Manifest Import.
784 *
785 * It is also possible that the early-manifest-import service does not yet
786 * have a repository representation when this function runs.  This happens
787 * if non-Early Manifest Import system is upgraded to an Early Manifest
788 * Import based system.  Thus, the non-existence of general/enabled is not
789 * an error.
790 *
791 * Returns 1 if Early Manifest Import is disabled and 0 otherwise.
792 */
793static int
794emi_is_disabled()
795{
796	int disabled = 0;
797	int disconnected = 1;
798	int enabled;
799	scf_handle_t *hndl = NULL;
800	scf_instance_t *inst = NULL;
801	uchar_t stored_hash[MHASH_SIZE];
802	char *pname;
803	int hashash, r;
804
805	while (hndl == NULL) {
806		hndl = libscf_handle_create_bound(SCF_VERSION);
807
808		if (hndl == NULL) {
809			/*
810			 * In the case that we can't bind to the repository
811			 * (which should have been started), we need to
812			 * allow the user into maintenance mode to
813			 * determine what's failed.
814			 */
815			fork_sulogin(B_FALSE, "Unable to bind a new repository "
816			    "handle: %s\n", scf_strerror(scf_error()));
817		}
818	}
819
820	while (disconnected) {
821		r = libscf_fmri_get_instance(hndl, SCF_INSTANCE_EMI, &inst);
822		if (r != 0) {
823			switch (r) {
824			case ECONNABORTED:
825				libscf_handle_rebind(hndl);
826				continue;
827
828			case ENOENT:
829				/*
830				 * Early Manifest Import service is not in
831				 * the repository. Check the manifest file
832				 * and service's hash in smf/manifest to
833				 * figure out whether Early Manifest Import
834				 * service was deleted. If Early Manifest Import
835				 * service was deleted, treat that as a disable
836				 * and don't run early import.
837				 */
838
839				if (access(EMI_MFST, F_OK)) {
840					/*
841					 * Manifest isn't found, so service is
842					 * properly removed.
843					 */
844					disabled = 1;
845				} else {
846					/*
847					 * If manifest exists and we have the
848					 * hash, the service was improperly
849					 * deleted, generate a warning and treat
850					 * this as a disable.
851					 */
852
853					if ((pname = mhash_filename_to_propname(
854					    EMI_MFST, B_TRUE)) == NULL) {
855						/*
856						 * Treat failure to get propname
857						 * as a disable.
858						 */
859						disabled = 1;
860						uu_warn("Failed to get propname"
861						    " for %s.\n",
862						    SCF_INSTANCE_EMI);
863					} else {
864						hashash = mhash_retrieve_entry(
865						    hndl, pname,
866						    stored_hash,
867						    NULL) == 0;
868						uu_free(pname);
869
870						if (hashash) {
871							disabled = 1;
872							uu_warn("%s service is "
873							    "deleted \n",
874							    SCF_INSTANCE_EMI);
875						}
876					}
877
878				}
879
880				disconnected = 0;
881				continue;
882
883			default:
884				bad_error("libscf_fmri_get_instance",
885				    scf_error());
886			}
887		}
888		r = libscf_get_basic_instance_data(hndl, inst, SCF_INSTANCE_EMI,
889		    &enabled, NULL, NULL);
890		if (r == 0) {
891			/*
892			 * enabled can be returned as -1, which indicates
893			 * that the enabled property was not found.  To us
894			 * that means that the service was not disabled.
895			 */
896			if (enabled == 0)
897				disabled = 1;
898		} else {
899			switch (r) {
900			case ECONNABORTED:
901				libscf_handle_rebind(hndl);
902				continue;
903
904			case ECANCELED:
905			case ENOENT:
906				break;
907			default:
908				bad_error("libscf_get_basic_instance_data", r);
909			}
910		}
911		disconnected = 0;
912	}
913
914out:
915	if (inst != NULL)
916		scf_instance_destroy(inst);
917	scf_handle_destroy(hndl);
918	return (disabled);
919}
920
921void
922fork_emi()
923{
924	pid_t pid;
925	ctid_t ctid = -1;
926	char **envp, **np;
927	char *emipath;
928	char corepath[PATH_MAX];
929	char *svc_state;
930	int setemilog;
931	int sz;
932
933	if (emi_is_disabled()) {
934		log_framework(LOG_NOTICE, "%s is  disabled and will "
935		    "not be run.\n", SCF_INSTANCE_EMI);
936		return;
937	}
938
939	/*
940	 * Early Manifest Import should run only once, at boot. If svc.startd
941	 * is some how restarted, Early Manifest Import  should not run again.
942	 * Use the Early Manifest Import service's state to figure out whether
943	 * Early Manifest Import has successfully completed earlier and bail
944	 * out if it did.
945	 */
946	if (svc_state = smf_get_state(SCF_INSTANCE_EMI)) {
947		if (strcmp(svc_state, SCF_STATE_STRING_ONLINE) == 0) {
948			free(svc_state);
949			return;
950		}
951		free(svc_state);
952	}
953
954	/*
955	 * Attempt to set Early Manifest Import service's state and log file.
956	 * If emi_set_state fails, set log file again in the next call to
957	 * emi_set_state.
958	 */
959	setemilog = emi_set_state(RESTARTER_STATE_OFFLINE, B_TRUE);
960
961	/* Don't go further if /usr isn't available */
962	if (access(SVCCFG_PATH, F_OK)) {
963		log_framework(LOG_NOTICE, "Early Manifest Import is not "
964		    "supported on systems with a separate /usr filesystem.\n");
965		return;
966	}
967
968fork_retry:
969	log_framework(LOG_DEBUG, "Starting Early Manifest Import\n");
970
971	/*
972	 * If we're retrying, we will have an old contract lying around
973	 * from the failure.  Since we're going to be creating a new
974	 * contract shortly, we abandon the old one now.
975	 */
976	if (ctid != -1)
977		contract_abandon(ctid);
978	ctid = -1;
979
980	pid = fork_common(SCF_INSTANCE_EMI, SCF_INSTANCE_EMI,
981	    MAX_EMI_RETRIES, &ctid, 0, 0, 0, 0, EMI_COOKIE);
982
983	if (pid != 0) {
984		int exitstatus;
985
986		if (waitpid(pid, &exitstatus, 0) == -1) {
987			fork_sulogin(B_FALSE, "waitpid on %s failed: "
988			    "%s\n", SCF_INSTANCE_EMI, strerror(errno));
989		} else if (WIFEXITED(exitstatus)) {
990			if (WEXITSTATUS(exitstatus)) {
991				fork_sulogin(B_FALSE, "%s exited with status "
992				    "%d \n", SCF_INSTANCE_EMI,
993				    WEXITSTATUS(exitstatus));
994				goto fork_retry;
995			}
996		} else if (WIFSIGNALED(exitstatus)) {
997			char signame[SIG2STR_MAX];
998
999			if (sig2str(WTERMSIG(exitstatus), signame))
1000				(void) snprintf(signame, SIG2STR_MAX,
1001				    "signum %d", WTERMSIG(exitstatus));
1002
1003			fork_sulogin(B_FALSE, "%s signalled: %s\n",
1004			    SCF_INSTANCE_EMI, signame);
1005			goto fork_retry;
1006		} else {
1007			fork_sulogin(B_FALSE, "%s non-exit condition: 0x%x\n",
1008			    SCF_INSTANCE_EMI, exitstatus);
1009			goto fork_retry;
1010		}
1011
1012		log_framework(LOG_DEBUG, "%s completed successfully\n",
1013		    SCF_INSTANCE_EMI);
1014
1015		/*
1016		 * Once Early Manifest Import completed, the Early Manifest
1017		 * Import service must have been imported so set log file and
1018		 * state properties. Since this information is required for
1019		 * late manifest import and common admin operations, failing to
1020		 * set these properties should result in su login so admin can
1021		 * correct the problem.
1022		 */
1023		(void) emi_set_state(RESTARTER_STATE_ONLINE,
1024		    setemilog ? B_TRUE : B_FALSE);
1025
1026		return;
1027	}
1028
1029	/* child */
1030
1031	/*
1032	 * Set our per-process core file path to leave core files in
1033	 * /etc/svc/volatile directory, named after the PID to aid in debugging.
1034	 */
1035	(void) snprintf(corepath, sizeof (corepath),
1036	    "/etc/svc/volatile/core.emi.%%p");
1037	(void) core_set_process_path(corepath, strlen(corepath) + 1, getpid());
1038
1039	/*
1040	 * Similar to running legacy services, we need to manually set
1041	 * log files here and environment variables.
1042	 */
1043	setlog(EMI_LOG);
1044
1045	envp = startd_zalloc(sizeof (char *) * 3);
1046	np = envp;
1047
1048	sz = sizeof ("SMF_FMRI=") + strlen(SCF_INSTANCE_EMI);
1049	*np = startd_zalloc(sz);
1050	(void) strlcpy(*np, "SMF_FMRI=", sz);
1051	(void) strncat(*np, SCF_INSTANCE_EMI, sz);
1052	np++;
1053
1054	emipath = getenv("PATH");
1055	if (emipath == NULL)
1056		emipath = strdup("/usr/sbin:/usr/bin");
1057
1058	sz = sizeof ("PATH=") + strlen(emipath);
1059	*np = startd_zalloc(sz);
1060	(void) strlcpy(*np, "PATH=", sz);
1061	(void) strncat(*np, emipath, sz);
1062
1063	log_framework(LOG_DEBUG, "executing Early Manifest Import\n");
1064	(void) execle(EMI_PATH, EMI_PATH, NULL, envp);
1065
1066	/*
1067	 * Status code is used above to identify Early Manifest Import
1068	 * exec failure.
1069	 */
1070	exit(1);
1071}
1072
1073extern char **environ;
1074
1075/*
1076 * A local variation on system(3c) which accepts a timeout argument.  This
1077 * allows us to better ensure that the system will actually shut down.
1078 *
1079 * gracetime specifies an amount of time in seconds which the routine must wait
1080 * after the command exits, to allow for asynchronous effects (like sent
1081 * signals) to take effect.  This can be zero.
1082 */
1083void
1084fork_with_timeout(const char *cmd, uint_t gracetime, uint_t timeout)
1085{
1086	int err = 0;
1087	pid_t pid;
1088	char *argv[4];
1089	posix_spawnattr_t attr;
1090	posix_spawn_file_actions_t factions;
1091
1092	sigset_t mask, savemask;
1093	uint_t msec_timeout;
1094	uint_t msec_spent = 0;
1095	uint_t msec_gracetime;
1096	int status;
1097
1098	msec_timeout = timeout * 1000;
1099	msec_gracetime = gracetime * 1000;
1100
1101	/*
1102	 * See also system(3c) in libc.  This is very similar, except
1103	 * that we avoid some unneeded complexity.
1104	 */
1105	err = posix_spawnattr_init(&attr);
1106	if (err == 0)
1107		err = posix_spawnattr_setflags(&attr,
1108		    POSIX_SPAWN_SETSIGMASK | POSIX_SPAWN_SETSIGDEF |
1109		    POSIX_SPAWN_NOSIGCHLD_NP | POSIX_SPAWN_WAITPID_NP |
1110		    POSIX_SPAWN_NOEXECERR_NP);
1111
1112	/*
1113	 * We choose to close fd's above 2, a deviation from system.
1114	 */
1115	if (err == 0)
1116		err = posix_spawn_file_actions_init(&factions);
1117	if (err == 0)
1118		err = posix_spawn_file_actions_addclosefrom_np(&factions,
1119		    STDERR_FILENO + 1);
1120
1121	(void) sigemptyset(&mask);
1122	(void) sigaddset(&mask, SIGCHLD);
1123	(void) thr_sigsetmask(SIG_BLOCK, &mask, &savemask);
1124
1125	argv[0] = "/bin/sh";
1126	argv[1] = "-c";
1127	argv[2] = (char *)cmd;
1128	argv[3] = NULL;
1129
1130	if (err == 0)
1131		err = posix_spawn(&pid, "/bin/sh", &factions, &attr,
1132		    (char *const *)argv, (char *const *)environ);
1133
1134	(void) posix_spawnattr_destroy(&attr);
1135	(void) posix_spawn_file_actions_destroy(&factions);
1136
1137	if (err) {
1138		uu_warn("Failed to spawn %s: %s\n", cmd, strerror(err));
1139	} else {
1140		for (;;) {
1141			int w;
1142			w = waitpid(pid, &status, WNOHANG);
1143			if (w == -1 && errno != EINTR)
1144				break;
1145			if (w > 0) {
1146				/*
1147				 * Command succeeded, so give it gracetime
1148				 * seconds for it to have an effect.
1149				 */
1150				if (status == 0 && msec_gracetime != 0)
1151					(void) poll(NULL, 0, msec_gracetime);
1152				break;
1153			}
1154
1155			(void) poll(NULL, 0, 100);
1156			msec_spent += 100;
1157			/*
1158			 * If we timed out, kill off the process, then try to
1159			 * wait for it-- it's possible that we could accumulate
1160			 * a zombie here since we don't allow waitpid to hang,
1161			 * but it's better to let that happen and continue to
1162			 * make progress.
1163			 */
1164			if (msec_spent >= msec_timeout) {
1165				uu_warn("'%s' timed out after %d "
1166				    "seconds.  Killing.\n", cmd,
1167				    timeout);
1168				(void) kill(pid, SIGTERM);
1169				(void) poll(NULL, 0, 100);
1170				(void) kill(pid, SIGKILL);
1171				(void) poll(NULL, 0, 100);
1172				(void) waitpid(pid, &status, WNOHANG);
1173				break;
1174			}
1175		}
1176	}
1177	(void) thr_sigsetmask(SIG_BLOCK, &savemask, NULL);
1178}
1179