restarter.c revision 8823:000507e9108d
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * restarter.c - service manipulation
28 *
29 * This component manages services whose restarter is svc.startd, the standard
30 * restarter.  It translates restarter protocol events from the graph engine
31 * into actions on processes, as a delegated restarter would do.
32 *
33 * The master restarter manages a number of always-running threads:
34 *   - restarter event thread: events from the graph engine
35 *   - timeout thread: thread to fire queued timeouts
36 *   - contract thread: thread to handle contract events
37 *   - wait thread: thread to handle wait-based services
38 *
39 * The other threads are created as-needed:
40 *   - per-instance method threads
41 *   - per-instance event processing threads
42 *
43 * The interaction of all threads must result in the following conditions
44 * being satisfied (on a per-instance basis):
45 *   - restarter events must be processed in order
46 *   - method execution must be serialized
47 *   - instance delete must be held until outstanding methods are complete
48 *   - contract events shouldn't be processed while a method is running
49 *   - timeouts should fire even when a method is running
50 *
51 * Service instances are represented by restarter_inst_t's and are kept in the
52 * instance_list list.
53 *
54 * Service States
55 *   The current state of a service instance is kept in
56 *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
57 *   some time, then before we effect the transition we set
58 *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
59 *   rotate i_next_state to i_state and set i_next_state to
60 *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
61 *   held.  The exception is when we launch methods, which are done with
62 *   a separate thread.  To keep any other threads from grabbing ri_lock before
63 *   method_thread() does, we set ri_method_thread to the thread id of the
64 *   method thread, and when it is nonzero any thread with a different thread id
65 *   waits on ri_method_cv.
66 *
67 * Method execution is serialized by blocking on ri_method_cv in
68 * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
69 * also prevents the instance structure from being deleted until all
70 * outstanding operations such as method_thread() have finished.
71 *
72 * Lock ordering:
73 *
74 * dgraph_lock [can be held when taking:]
75 *   utmpx_lock
76 *   dictionary->dict_lock
77 *   st->st_load_lock
78 *   wait_info_lock
79 *   ru->restarter_update_lock
80 *     restarter_queue->rpeq_lock
81 *   instance_list.ril_lock
82 *     inst->ri_lock
83 *   st->st_configd_live_lock
84 *
85 * instance_list.ril_lock
86 *   graph_queue->gpeq_lock
87 *   gu->gu_lock
88 *   st->st_configd_live_lock
89 *   dictionary->dict_lock
90 *   inst->ri_lock
91 *     graph_queue->gpeq_lock
92 *     gu->gu_lock
93 *     tu->tu_lock
94 *     tq->tq_lock
95 *     inst->ri_queue_lock
96 *       wait_info_lock
97 *       bp->cb_lock
98 *     utmpx_lock
99 *
100 * single_user_thread_lock
101 *   wait_info_lock
102 *   utmpx_lock
103 *
104 * gu_freeze_lock
105 *
106 * logbuf_mutex nests inside pretty much everything.
107 */
108
109#include <sys/contract/process.h>
110#include <sys/ctfs.h>
111#include <sys/stat.h>
112#include <sys/time.h>
113#include <sys/types.h>
114#include <sys/uio.h>
115#include <sys/wait.h>
116#include <assert.h>
117#include <errno.h>
118#include <fcntl.h>
119#include <libcontract.h>
120#include <libcontract_priv.h>
121#include <libintl.h>
122#include <librestart.h>
123#include <librestart_priv.h>
124#include <libuutil.h>
125#include <limits.h>
126#include <poll.h>
127#include <port.h>
128#include <pthread.h>
129#include <stdarg.h>
130#include <stdio.h>
131#include <strings.h>
132#include <unistd.h>
133
134#include "startd.h"
135#include "protocol.h"
136
137static uu_list_pool_t *restarter_instance_pool;
138static restarter_instance_list_t instance_list;
139
140static uu_list_pool_t *restarter_queue_pool;
141
142/*ARGSUSED*/
143static int
144restarter_instance_compare(const void *lc_arg, const void *rc_arg,
145    void *private)
146{
147	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
148	int rc_id = *(int *)rc_arg;
149
150	if (lc_id > rc_id)
151		return (1);
152	if (lc_id < rc_id)
153		return (-1);
154	return (0);
155}
156
157static restarter_inst_t *
158inst_lookup_by_name(const char *name)
159{
160	int id;
161
162	id = dict_lookup_byname(name);
163	if (id == -1)
164		return (NULL);
165
166	return (inst_lookup_by_id(id));
167}
168
169restarter_inst_t *
170inst_lookup_by_id(int id)
171{
172	restarter_inst_t *inst;
173
174	MUTEX_LOCK(&instance_list.ril_lock);
175	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
176	if (inst != NULL)
177		MUTEX_LOCK(&inst->ri_lock);
178	MUTEX_UNLOCK(&instance_list.ril_lock);
179
180	if (inst != NULL) {
181		while (inst->ri_method_thread != 0 &&
182		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
183			++inst->ri_method_waiters;
184			(void) pthread_cond_wait(&inst->ri_method_cv,
185			    &inst->ri_lock);
186			assert(inst->ri_method_waiters > 0);
187			--inst->ri_method_waiters;
188		}
189	}
190
191	return (inst);
192}
193
194static restarter_inst_t *
195inst_lookup_queue(const char *name)
196{
197	int id;
198	restarter_inst_t *inst;
199
200	id = dict_lookup_byname(name);
201	if (id == -1)
202		return (NULL);
203
204	MUTEX_LOCK(&instance_list.ril_lock);
205	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
206	if (inst != NULL)
207		MUTEX_LOCK(&inst->ri_queue_lock);
208	MUTEX_UNLOCK(&instance_list.ril_lock);
209
210	return (inst);
211}
212
213const char *
214service_style(int flags)
215{
216	switch (flags & RINST_STYLE_MASK) {
217	case RINST_CONTRACT:	return ("contract");
218	case RINST_TRANSIENT:	return ("transient");
219	case RINST_WAIT:	return ("wait");
220
221	default:
222#ifndef NDEBUG
223		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
224#endif
225		abort();
226		/* NOTREACHED */
227	}
228}
229
230/*
231 * Fails with ECONNABORTED or ECANCELED.
232 */
233static int
234check_contract(restarter_inst_t *inst, boolean_t primary,
235    scf_instance_t *scf_inst)
236{
237	ctid_t *ctidp;
238	int fd, r;
239
240	ctidp = primary ? &inst->ri_i.i_primary_ctid :
241	    &inst->ri_i.i_transient_ctid;
242
243	assert(*ctidp >= 1);
244
245	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
246	if (fd >= 0) {
247		r = close(fd);
248		assert(r == 0);
249		return (0);
250	}
251
252	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
253	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
254	switch (r) {
255	case 0:
256	case ECONNABORTED:
257	case ECANCELED:
258		*ctidp = 0;
259		return (r);
260
261	case ENOMEM:
262		uu_die("Out of memory\n");
263		/* NOTREACHED */
264
265	case EPERM:
266		uu_die("Insufficient privilege.\n");
267		/* NOTREACHED */
268
269	case EACCES:
270		uu_die("Repository backend access denied.\n");
271		/* NOTREACHED */
272
273	case EROFS:
274		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
275		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
276		return (0);
277
278	case EINVAL:
279	case EBADF:
280	default:
281		assert(0);
282		abort();
283		/* NOTREACHED */
284	}
285}
286
287static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
288
289/*
290 * int restarter_insert_inst(scf_handle_t *, char *)
291 *   If the inst is already in the restarter list, return its id.  If the inst
292 *   is not in the restarter list, initialize a restarter_inst_t, initialize its
293 *   states, insert it into the list, and return 0.
294 *
295 *   Fails with
296 *     ENOENT - name is not in the repository
297 */
298static int
299restarter_insert_inst(scf_handle_t *h, const char *name)
300{
301	int id, r;
302	restarter_inst_t *inst;
303	uu_list_index_t idx;
304	scf_service_t *scf_svc;
305	scf_instance_t *scf_inst;
306	scf_snapshot_t *snap = NULL;
307	scf_propertygroup_t *pg;
308	char *svc_name, *inst_name;
309	char logfilebuf[PATH_MAX];
310	char *c;
311	boolean_t do_commit_states;
312	restarter_instance_state_t state, next_state;
313	protocol_states_t *ps;
314	pid_t start_pid;
315
316	MUTEX_LOCK(&instance_list.ril_lock);
317
318	/*
319	 * We don't use inst_lookup_by_name() here because we want the lookup
320	 * & insert to be atomic.
321	 */
322	id = dict_lookup_byname(name);
323	if (id != -1) {
324		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
325		    &idx);
326		if (inst != NULL) {
327			MUTEX_UNLOCK(&instance_list.ril_lock);
328			return (0);
329		}
330	}
331
332	/* Allocate an instance */
333	inst = startd_zalloc(sizeof (restarter_inst_t));
334	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
335	inst->ri_utmpx_prefix[0] = '\0';
336
337	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
338	(void) strcpy((char *)inst->ri_i.i_fmri, name);
339
340	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
341
342	/*
343	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
344	 * just in case.
345	 */
346	inst->ri_id = (id != -1 ? id : dict_insert(name));
347
348	special_online_hooks_get(name, &inst->ri_pre_online_hook,
349	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
350
351	scf_svc = safe_scf_service_create(h);
352	scf_inst = safe_scf_instance_create(h);
353	pg = safe_scf_pg_create(h);
354	svc_name = startd_alloc(max_scf_name_size);
355	inst_name = startd_alloc(max_scf_name_size);
356
357rep_retry:
358	if (snap != NULL)
359		scf_snapshot_destroy(snap);
360	if (inst->ri_logstem != NULL)
361		startd_free(inst->ri_logstem, PATH_MAX);
362	if (inst->ri_common_name != NULL)
363		startd_free(inst->ri_common_name, max_scf_value_size);
364	if (inst->ri_C_common_name != NULL)
365		startd_free(inst->ri_C_common_name, max_scf_value_size);
366	snap = NULL;
367	inst->ri_logstem = NULL;
368	inst->ri_common_name = NULL;
369	inst->ri_C_common_name = NULL;
370
371	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
372	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
373		switch (scf_error()) {
374		case SCF_ERROR_CONNECTION_BROKEN:
375			libscf_handle_rebind(h);
376			goto rep_retry;
377
378		case SCF_ERROR_NOT_FOUND:
379			goto deleted;
380		}
381
382		uu_die("Can't decode FMRI %s: %s\n", name,
383		    scf_strerror(scf_error()));
384	}
385
386	/*
387	 * If there's no running snapshot, then we execute using the editing
388	 * snapshot.  Pending snapshots will be taken later.
389	 */
390	snap = libscf_get_running_snapshot(scf_inst);
391
392	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
393	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
394	    0)) {
395		switch (scf_error()) {
396		case SCF_ERROR_NOT_SET:
397			break;
398
399		case SCF_ERROR_CONNECTION_BROKEN:
400			libscf_handle_rebind(h);
401			goto rep_retry;
402
403		default:
404			assert(0);
405			abort();
406		}
407
408		goto deleted;
409	}
410
411	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
412	for (c = logfilebuf; *c != '\0'; c++)
413		if (*c == '/')
414			*c = '-';
415
416	inst->ri_logstem = startd_alloc(PATH_MAX);
417	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
418	    LOG_SUFFIX);
419
420	/*
421	 * If the restarter group is missing, use uninit/none.  Otherwise,
422	 * we're probably being restarted & don't want to mess up the states
423	 * that are there.
424	 */
425	state = RESTARTER_STATE_UNINIT;
426	next_state = RESTARTER_STATE_NONE;
427
428	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
429	if (r != 0) {
430		switch (scf_error()) {
431		case SCF_ERROR_CONNECTION_BROKEN:
432			libscf_handle_rebind(h);
433			goto rep_retry;
434
435		case SCF_ERROR_NOT_SET:
436			goto deleted;
437
438		case SCF_ERROR_NOT_FOUND:
439			/*
440			 * This shouldn't happen since the graph engine should
441			 * have initialized the state to uninitialized/none if
442			 * there was no restarter pg.  In case somebody
443			 * deleted it, though....
444			 */
445			do_commit_states = B_TRUE;
446			break;
447
448		default:
449			assert(0);
450			abort();
451		}
452	} else {
453		r = libscf_read_states(pg, &state, &next_state);
454		if (r != 0) {
455			do_commit_states = B_TRUE;
456		} else {
457			if (next_state != RESTARTER_STATE_NONE) {
458				/*
459				 * Force next_state to _NONE since we
460				 * don't look for method processes.
461				 */
462				next_state = RESTARTER_STATE_NONE;
463				do_commit_states = B_TRUE;
464			} else {
465				/*
466				 * Inform the restarter of our state without
467				 * changing the STIME in the repository.
468				 */
469				ps = startd_alloc(sizeof (*ps));
470				inst->ri_i.i_state = ps->ps_state = state;
471				inst->ri_i.i_next_state = ps->ps_state_next =
472				    next_state;
473
474				graph_protocol_send_event(inst->ri_i.i_fmri,
475				    GRAPH_UPDATE_STATE_CHANGE, ps);
476
477				do_commit_states = B_FALSE;
478			}
479		}
480	}
481
482	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
483	    &inst->ri_utmpx_prefix)) {
484	case 0:
485		break;
486
487	case ECONNABORTED:
488		libscf_handle_rebind(h);
489		goto rep_retry;
490
491	case ECANCELED:
492		goto deleted;
493
494	case ENOENT:
495		/*
496		 * This is odd, because the graph engine should have required
497		 * the general property group.  So we'll just use default
498		 * flags in anticipation of the graph engine sending us
499		 * REMOVE_INSTANCE when it finds out that the general property
500		 * group has been deleted.
501		 */
502		inst->ri_flags = RINST_CONTRACT;
503		break;
504
505	default:
506		assert(0);
507		abort();
508	}
509
510	switch (libscf_get_template_values(scf_inst, snap,
511	    &inst->ri_common_name, &inst->ri_C_common_name)) {
512	case 0:
513		break;
514
515	case ECONNABORTED:
516		libscf_handle_rebind(h);
517		goto rep_retry;
518
519	case ECANCELED:
520		goto deleted;
521
522	case ECHILD:
523	case ENOENT:
524		break;
525
526	default:
527		assert(0);
528		abort();
529	}
530
531	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
532	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
533	    &start_pid)) {
534	case 0:
535		break;
536
537	case ECONNABORTED:
538		libscf_handle_rebind(h);
539		goto rep_retry;
540
541	case ECANCELED:
542		goto deleted;
543
544	default:
545		assert(0);
546		abort();
547	}
548
549	if (inst->ri_i.i_primary_ctid >= 1) {
550		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
551
552		switch (check_contract(inst, B_TRUE, scf_inst)) {
553		case 0:
554			break;
555
556		case ECONNABORTED:
557			libscf_handle_rebind(h);
558			goto rep_retry;
559
560		case ECANCELED:
561			goto deleted;
562
563		default:
564			assert(0);
565			abort();
566		}
567	}
568
569	if (inst->ri_i.i_transient_ctid >= 1) {
570		switch (check_contract(inst, B_FALSE, scf_inst)) {
571		case 0:
572			break;
573
574		case ECONNABORTED:
575			libscf_handle_rebind(h);
576			goto rep_retry;
577
578		case ECANCELED:
579			goto deleted;
580
581		default:
582			assert(0);
583			abort();
584		}
585	}
586
587	/* No more failures we live through, so add it to the list. */
588	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
589	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
590	MUTEX_LOCK(&inst->ri_lock);
591	MUTEX_LOCK(&inst->ri_queue_lock);
592
593	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
594
595	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
596	uu_list_insert(instance_list.ril_instance_list, inst, idx);
597	MUTEX_UNLOCK(&instance_list.ril_lock);
598
599	if (start_pid != -1 &&
600	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
601		int ret;
602		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
603		if (ret == -1) {
604			/*
605			 * Implication:  if we can't reregister the
606			 * instance, we will start another one.  Two
607			 * instances may or may not result in a resource
608			 * conflict.
609			 */
610			log_error(LOG_WARNING,
611			    "%s: couldn't reregister %ld for wait\n",
612			    inst->ri_i.i_fmri, start_pid);
613		} else if (ret == 1) {
614			/*
615			 * Leading PID has exited.
616			 */
617			(void) stop_instance(h, inst, RSTOP_EXIT);
618		}
619	}
620
621
622	scf_pg_destroy(pg);
623
624	if (do_commit_states)
625		(void) restarter_instance_update_states(h, inst, state,
626		    next_state, RERR_NONE, NULL);
627
628	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
629	    service_style(inst->ri_flags));
630
631	MUTEX_UNLOCK(&inst->ri_queue_lock);
632	MUTEX_UNLOCK(&inst->ri_lock);
633
634	startd_free(svc_name, max_scf_name_size);
635	startd_free(inst_name, max_scf_name_size);
636	scf_snapshot_destroy(snap);
637	scf_instance_destroy(scf_inst);
638	scf_service_destroy(scf_svc);
639
640	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
641	    name);
642
643	return (0);
644
645deleted:
646	MUTEX_UNLOCK(&instance_list.ril_lock);
647	startd_free(inst_name, max_scf_name_size);
648	startd_free(svc_name, max_scf_name_size);
649	if (snap != NULL)
650		scf_snapshot_destroy(snap);
651	scf_pg_destroy(pg);
652	scf_instance_destroy(scf_inst);
653	scf_service_destroy(scf_svc);
654	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
655	uu_list_destroy(inst->ri_queue);
656	if (inst->ri_logstem != NULL)
657		startd_free(inst->ri_logstem, PATH_MAX);
658	if (inst->ri_common_name != NULL)
659		startd_free(inst->ri_common_name, max_scf_value_size);
660	if (inst->ri_C_common_name != NULL)
661		startd_free(inst->ri_C_common_name, max_scf_value_size);
662	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
663	startd_free(inst, sizeof (restarter_inst_t));
664	return (ENOENT);
665}
666
667static void
668restarter_delete_inst(restarter_inst_t *ri)
669{
670	int id;
671	restarter_inst_t *rip;
672	void *cookie = NULL;
673	restarter_instance_qentry_t *e;
674
675	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
676
677	/*
678	 * Must drop the instance lock so we can pick up the instance_list
679	 * lock & remove the instance.
680	 */
681	id = ri->ri_id;
682	MUTEX_UNLOCK(&ri->ri_lock);
683
684	MUTEX_LOCK(&instance_list.ril_lock);
685
686	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
687	if (rip == NULL) {
688		MUTEX_UNLOCK(&instance_list.ril_lock);
689		return;
690	}
691
692	assert(ri == rip);
693
694	uu_list_remove(instance_list.ril_instance_list, ri);
695
696	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
697	    ri->ri_i.i_fmri);
698
699	MUTEX_UNLOCK(&instance_list.ril_lock);
700
701	/*
702	 * We can lock the instance without holding the instance_list lock
703	 * since we removed the instance from the list.
704	 */
705	MUTEX_LOCK(&ri->ri_lock);
706	MUTEX_LOCK(&ri->ri_queue_lock);
707
708	if (ri->ri_i.i_primary_ctid >= 1)
709		contract_hash_remove(ri->ri_i.i_primary_ctid);
710
711	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
712		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
713
714	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
715		startd_free(e, sizeof (*e));
716	uu_list_destroy(ri->ri_queue);
717
718	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
719	startd_free(ri->ri_logstem, PATH_MAX);
720	if (ri->ri_common_name != NULL)
721		startd_free(ri->ri_common_name, max_scf_value_size);
722	if (ri->ri_C_common_name != NULL)
723		startd_free(ri->ri_C_common_name, max_scf_value_size);
724	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
725	(void) pthread_mutex_destroy(&ri->ri_lock);
726	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
727	startd_free(ri, sizeof (restarter_inst_t));
728}
729
730/*
731 * instance_is_wait_style()
732 *
733 *   Returns 1 if the given instance is a "wait-style" service instance.
734 */
735int
736instance_is_wait_style(restarter_inst_t *inst)
737{
738	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
739	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
740}
741
742/*
743 * instance_is_transient_style()
744 *
745 *   Returns 1 if the given instance is a transient service instance.
746 */
747int
748instance_is_transient_style(restarter_inst_t *inst)
749{
750	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
751	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
752}
753
754/*
755 * instance_in_transition()
756 * Returns 1 if instance is in transition, 0 if not
757 */
758int
759instance_in_transition(restarter_inst_t *inst)
760{
761	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
762	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
763		return (0);
764	return (1);
765}
766
767/*
768 * returns 1 if instance is already started, 0 if not
769 */
770static int
771instance_started(restarter_inst_t *inst)
772{
773	int ret;
774
775	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
776
777	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
778	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
779		ret = 1;
780	else
781		ret = 0;
782
783	return (ret);
784}
785
786/*
787 * Returns
788 *   0 - success
789 *   ECONNRESET - success, but h was rebound
790 */
791int
792restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
793    restarter_instance_state_t new_state,
794    restarter_instance_state_t new_state_next, restarter_error_t err, char *aux)
795{
796	protocol_states_t *states;
797	int e;
798	uint_t retry_count = 0, msecs = ALLOC_DELAY;
799	boolean_t rebound = B_FALSE;
800	int prev_state_online;
801	int state_online;
802
803	assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
804
805	prev_state_online = instance_started(ri);
806
807retry:
808	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
809	    aux);
810	switch (e) {
811	case 0:
812		break;
813
814	case ENOMEM:
815		++retry_count;
816		if (retry_count < ALLOC_RETRY) {
817			(void) poll(NULL, 0, msecs);
818			msecs *= ALLOC_DELAY_MULT;
819			goto retry;
820		}
821
822		/* Like startd_alloc(). */
823		uu_die("Insufficient memory.\n");
824		/* NOTREACHED */
825
826	case ECONNABORTED:
827		libscf_handle_rebind(h);
828		rebound = B_TRUE;
829		goto retry;
830
831	case EPERM:
832	case EACCES:
833	case EROFS:
834		log_error(LOG_NOTICE, "Could not commit state change for %s "
835		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
836		/* FALLTHROUGH */
837
838	case ENOENT:
839		ri->ri_i.i_state = new_state;
840		ri->ri_i.i_next_state = new_state_next;
841		break;
842
843	case EINVAL:
844	default:
845		bad_error("_restarter_commit_states", e);
846	}
847
848	states = startd_alloc(sizeof (protocol_states_t));
849	states->ps_state = new_state;
850	states->ps_state_next = new_state_next;
851	states->ps_err = err;
852	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
853	    (void *)states);
854
855	state_online = instance_started(ri);
856
857	if (prev_state_online && !state_online)
858		ri->ri_post_offline_hook();
859	else if (!prev_state_online && state_online)
860		ri->ri_post_online_hook();
861
862	return (rebound ? ECONNRESET : 0);
863}
864
865void
866restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
867{
868	restarter_inst_t *inst;
869
870	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
871
872	inst = inst_lookup_by_name(fmri);
873	if (inst == NULL)
874		return;
875
876	inst->ri_flags |= flag;
877
878	MUTEX_UNLOCK(&inst->ri_lock);
879}
880
881static void
882restarter_take_pending_snapshots(scf_handle_t *h)
883{
884	restarter_inst_t *inst;
885	int r;
886
887	MUTEX_LOCK(&instance_list.ril_lock);
888
889	for (inst = uu_list_first(instance_list.ril_instance_list);
890	    inst != NULL;
891	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
892		const char *fmri;
893		scf_instance_t *sinst = NULL;
894
895		MUTEX_LOCK(&inst->ri_lock);
896
897		/*
898		 * This is where we'd check inst->ri_method_thread and if it
899		 * were nonzero we'd wait in anticipation of another thread
900		 * executing a method for inst.  Doing so with the instance_list
901		 * locked, though, leads to deadlock.  Since taking a snapshot
902		 * during that window won't hurt anything, we'll just continue.
903		 */
904
905		fmri = inst->ri_i.i_fmri;
906
907		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
908			scf_snapshot_t *rsnap;
909
910			(void) libscf_fmri_get_instance(h, fmri, &sinst);
911
912			rsnap = libscf_get_or_make_running_snapshot(sinst,
913			    fmri, B_FALSE);
914
915			scf_instance_destroy(sinst);
916
917			if (rsnap != NULL)
918				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
919
920			scf_snapshot_destroy(rsnap);
921		}
922
923		if (inst->ri_flags & RINST_RETAKE_START) {
924			switch (r = libscf_snapshots_poststart(h, fmri,
925			    B_FALSE)) {
926			case 0:
927			case ENOENT:
928				inst->ri_flags &= ~RINST_RETAKE_START;
929				break;
930
931			case ECONNABORTED:
932				break;
933
934			case EACCES:
935			default:
936				bad_error("libscf_snapshots_poststart", r);
937			}
938		}
939
940		MUTEX_UNLOCK(&inst->ri_lock);
941	}
942
943	MUTEX_UNLOCK(&instance_list.ril_lock);
944}
945
946/* ARGSUSED */
947void *
948restarter_post_fsminimal_thread(void *unused)
949{
950	scf_handle_t *h;
951	int r;
952
953	h = libscf_handle_create_bound_loop();
954
955	for (;;) {
956		r = libscf_create_self(h);
957		if (r == 0)
958			break;
959
960		assert(r == ECONNABORTED);
961		libscf_handle_rebind(h);
962	}
963
964	restarter_take_pending_snapshots(h);
965
966	(void) scf_handle_unbind(h);
967	scf_handle_destroy(h);
968
969	return (NULL);
970}
971
972/*
973 * int stop_instance()
974 *
975 *   Stop the instance identified by the instance given as the second argument,
976 *   for the cause stated.
977 *
978 *   Returns
979 *     0 - success
980 *     -1 - inst is in transition
981 */
982static int
983stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
984    stop_cause_t cause)
985{
986	fork_info_t *info;
987	const char *cp;
988	int err;
989	restarter_error_t re;
990
991	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
992	assert(inst->ri_method_thread == 0);
993
994	switch (cause) {
995	case RSTOP_EXIT:
996		re = RERR_RESTART;
997		cp = "all processes in service exited";
998		break;
999	case RSTOP_CORE:
1000		re = RERR_FAULT;
1001		cp = "process dumped core";
1002		break;
1003	case RSTOP_SIGNAL:
1004		re = RERR_FAULT;
1005		cp = "process received fatal signal from outside the service";
1006		break;
1007	case RSTOP_HWERR:
1008		re = RERR_FAULT;
1009		cp = "process killed due to uncorrectable hardware error";
1010		break;
1011	case RSTOP_DEPENDENCY:
1012		re = RERR_RESTART;
1013		cp = "dependency activity requires stop";
1014		break;
1015	case RSTOP_DISABLE:
1016		re = RERR_RESTART;
1017		cp = "service disabled";
1018		break;
1019	case RSTOP_RESTART:
1020		re = RERR_RESTART;
1021		cp = "service restarting";
1022		break;
1023	default:
1024#ifndef NDEBUG
1025		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
1026		    cause, __FILE__, __LINE__);
1027#endif
1028		abort();
1029	}
1030
1031	/* Services in the disabled and maintenance state are ignored */
1032	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1033	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
1034		log_framework(LOG_DEBUG,
1035		    "%s: stop_instance -> is maint/disabled\n",
1036		    inst->ri_i.i_fmri);
1037		return (0);
1038	}
1039
1040	/* Already stopped instances are left alone */
1041	if (instance_started(inst) == 0) {
1042		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
1043		    inst->ri_i.i_fmri);
1044		return (0);
1045	}
1046
1047	if (instance_in_transition(inst)) {
1048		/* requeue event by returning -1 */
1049		log_framework(LOG_DEBUG,
1050		    "Restarter: Not stopping %s, in transition.\n",
1051		    inst->ri_i.i_fmri);
1052		return (-1);
1053	}
1054
1055	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
1056
1057	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
1058	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
1059
1060	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
1061		/*
1062		 * No need to stop instance, as child has exited; remove
1063		 * contract and move the instance to the offline state.
1064		 */
1065		switch (err = restarter_instance_update_states(local_handle,
1066		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1067		    NULL)) {
1068		case 0:
1069		case ECONNRESET:
1070			break;
1071
1072		default:
1073			bad_error("restarter_instance_update_states", err);
1074		}
1075
1076		(void) update_fault_count(inst, FAULT_COUNT_RESET);
1077
1078		if (inst->ri_i.i_primary_ctid != 0) {
1079			inst->ri_m_inst =
1080			    safe_scf_instance_create(local_handle);
1081			inst->ri_mi_deleted = B_FALSE;
1082
1083			libscf_reget_instance(inst);
1084			method_remove_contract(inst, B_TRUE, B_TRUE);
1085
1086			scf_instance_destroy(inst->ri_m_inst);
1087			inst->ri_m_inst = NULL;
1088		}
1089
1090		switch (err = restarter_instance_update_states(local_handle,
1091		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1092		    NULL)) {
1093		case 0:
1094		case ECONNRESET:
1095			break;
1096
1097		default:
1098			bad_error("restarter_instance_update_states", err);
1099		}
1100
1101		return (0);
1102	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
1103		/*
1104		 * Stopping a wait service through means other than the pid
1105		 * exiting should keep wait_thread() from restarting the
1106		 * service, by removing it from the wait list.
1107		 * We cannot remove it right now otherwise the process will
1108		 * end up <defunct> so mark it to be ignored.
1109		 */
1110		wait_ignore_by_fmri(inst->ri_i.i_fmri);
1111	}
1112
1113	switch (err = restarter_instance_update_states(local_handle, inst,
1114	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1115	    RESTARTER_STATE_DISABLED, RERR_NONE, NULL)) {
1116	case 0:
1117	case ECONNRESET:
1118		break;
1119
1120	default:
1121		bad_error("restarter_instance_update_states", err);
1122	}
1123
1124	info = startd_zalloc(sizeof (fork_info_t));
1125
1126	info->sf_id = inst->ri_id;
1127	info->sf_method_type = METHOD_STOP;
1128	info->sf_event_type = re;
1129	inst->ri_method_thread = startd_thread_create(method_thread, info);
1130
1131	return (0);
1132}
1133
1134/*
1135 * Returns
1136 *   ENOENT - fmri is not in instance_list
1137 *   0 - success
1138 *   ECONNRESET - success, though handle was rebound
1139 *   -1 - instance is in transition
1140 */
1141int
1142stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
1143{
1144	restarter_inst_t *rip;
1145	int r;
1146
1147	rip = inst_lookup_by_name(fmri);
1148	if (rip == NULL)
1149		return (ENOENT);
1150
1151	r = stop_instance(h, rip, flags);
1152
1153	MUTEX_UNLOCK(&rip->ri_lock);
1154
1155	return (r);
1156}
1157
1158static void
1159unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
1160    unmaint_cause_t cause)
1161{
1162	ctid_t ctid;
1163	scf_instance_t *inst;
1164	int r;
1165	uint_t tries = 0, msecs = ALLOC_DELAY;
1166	const char *cp;
1167
1168	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1169
1170	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
1171		log_error(LOG_DEBUG, "Restarter: "
1172		    "Ignoring maintenance off command because %s is not in the "
1173		    "maintenance state.\n", rip->ri_i.i_fmri);
1174		return;
1175	}
1176
1177	switch (cause) {
1178	case RUNMAINT_CLEAR:
1179		cp = "clear requested";
1180		break;
1181	case RUNMAINT_DISABLE:
1182		cp = "disable requested";
1183		break;
1184	default:
1185#ifndef NDEBUG
1186		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
1187		    cause, __FILE__, __LINE__);
1188#endif
1189		abort();
1190	}
1191
1192	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
1193	    cp);
1194	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
1195	    "%s.\n", rip->ri_i.i_fmri, cp);
1196
1197	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1198	    RESTARTER_STATE_NONE, RERR_RESTART, "none");
1199
1200	/*
1201	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
1202	 * a primary contract.
1203	 */
1204	if (rip->ri_i.i_primary_ctid == 0)
1205		return;
1206
1207	ctid = rip->ri_i.i_primary_ctid;
1208	contract_abandon(ctid);
1209	rip->ri_i.i_primary_ctid = 0;
1210
1211rep_retry:
1212	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
1213	case 0:
1214		break;
1215
1216	case ECONNABORTED:
1217		libscf_handle_rebind(h);
1218		goto rep_retry;
1219
1220	case ENOENT:
1221		/* Must have been deleted. */
1222		return;
1223
1224	case EINVAL:
1225	case ENOTSUP:
1226	default:
1227		bad_error("libscf_handle_rebind", r);
1228	}
1229
1230again:
1231	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
1232	switch (r) {
1233	case 0:
1234		break;
1235
1236	case ENOMEM:
1237		++tries;
1238		if (tries < ALLOC_RETRY) {
1239			(void) poll(NULL, 0, msecs);
1240			msecs *= ALLOC_DELAY_MULT;
1241			goto again;
1242		}
1243
1244		uu_die("Insufficient memory.\n");
1245		/* NOTREACHED */
1246
1247	case ECONNABORTED:
1248		scf_instance_destroy(inst);
1249		libscf_handle_rebind(h);
1250		goto rep_retry;
1251
1252	case ECANCELED:
1253		break;
1254
1255	case EPERM:
1256	case EACCES:
1257	case EROFS:
1258		log_error(LOG_INFO,
1259		    "Could not remove contract id %lu for %s (%s).\n", ctid,
1260		    rip->ri_i.i_fmri, strerror(r));
1261		break;
1262
1263	case EINVAL:
1264	case EBADF:
1265	default:
1266		bad_error("restarter_remove_contract", r);
1267	}
1268
1269	scf_instance_destroy(inst);
1270}
1271
1272/*
1273 * enable_inst()
1274 *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
1275 *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
1276 *   disabled, move it to offline.  If the event is _DISABLE or
1277 *   _ADMIN_DISABLE, make sure inst will move to disabled.
1278 *
1279 *   Returns
1280 *     0 - success
1281 *     ECONNRESET - h was rebound
1282 */
1283static int
1284enable_inst(scf_handle_t *h, restarter_inst_t *inst, restarter_event_type_t e)
1285{
1286	restarter_instance_state_t state;
1287	int r;
1288
1289	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1290	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
1291	    e == RESTARTER_EVENT_TYPE_DISABLE ||
1292	    e == RESTARTER_EVENT_TYPE_ENABLE);
1293	assert(instance_in_transition(inst) == 0);
1294
1295	state = inst->ri_i.i_state;
1296
1297	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
1298		inst->ri_i.i_enabled = 1;
1299
1300		if (state == RESTARTER_STATE_UNINIT ||
1301		    state == RESTARTER_STATE_DISABLED) {
1302			/*
1303			 * B_FALSE: Don't log an error if the log_instance()
1304			 * fails because it will fail on the miniroot before
1305			 * install-discovery runs.
1306			 */
1307			log_instance(inst, B_FALSE, "Enabled.");
1308			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
1309			    inst->ri_i.i_fmri);
1310			(void) restarter_instance_update_states(h, inst,
1311			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1312			    RERR_NONE, NULL);
1313		} else {
1314			log_framework(LOG_DEBUG, "Restarter: "
1315			    "Not changing state of %s for enable command.\n",
1316			    inst->ri_i.i_fmri);
1317		}
1318	} else {
1319		inst->ri_i.i_enabled = 0;
1320
1321		switch (state) {
1322		case RESTARTER_STATE_ONLINE:
1323		case RESTARTER_STATE_DEGRADED:
1324			r = stop_instance(h, inst, RSTOP_DISABLE);
1325			return (r == ECONNRESET ? 0 : r);
1326
1327		case RESTARTER_STATE_OFFLINE:
1328		case RESTARTER_STATE_UNINIT:
1329			if (inst->ri_i.i_primary_ctid != 0) {
1330				inst->ri_m_inst = safe_scf_instance_create(h);
1331				inst->ri_mi_deleted = B_FALSE;
1332
1333				libscf_reget_instance(inst);
1334				method_remove_contract(inst, B_TRUE, B_TRUE);
1335
1336				scf_instance_destroy(inst->ri_m_inst);
1337			}
1338			/* B_FALSE: See log_instance(..., "Enabled."); above */
1339			log_instance(inst, B_FALSE, "Disabled.");
1340			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
1341			    inst->ri_i.i_fmri);
1342			(void) restarter_instance_update_states(h, inst,
1343			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1344			    RERR_RESTART, NULL);
1345			return (0);
1346
1347		case RESTARTER_STATE_DISABLED:
1348			break;
1349
1350		case RESTARTER_STATE_MAINT:
1351			/*
1352			 * We only want to pull the instance out of maintenance
1353			 * if the disable is on adminstrative request.  The
1354			 * graph engine sends _DISABLE events whenever a
1355			 * service isn't in the disabled state, and we don't
1356			 * want to pull the service out of maintenance if,
1357			 * for example, it is there due to a dependency cycle.
1358			 */
1359			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
1360				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
1361			break;
1362
1363		default:
1364#ifndef NDEBUG
1365			(void) fprintf(stderr, "Restarter instance %s has "
1366			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
1367#endif
1368			abort();
1369		}
1370	}
1371
1372	return (0);
1373}
1374
1375static void
1376start_instance(scf_handle_t *local_handle, restarter_inst_t *inst)
1377{
1378	fork_info_t *info;
1379
1380	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1381	assert(instance_in_transition(inst) == 0);
1382	assert(inst->ri_method_thread == 0);
1383
1384	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
1385	    inst->ri_i.i_fmri);
1386
1387	/* Services in the disabled and maintenance state are ignored */
1388	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
1389	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
1390	    inst->ri_i.i_enabled == 0) {
1391		log_framework(LOG_DEBUG,
1392		    "%s: start_instance -> is maint/disabled\n",
1393		    inst->ri_i.i_fmri);
1394		return;
1395	}
1396
1397	/* Already started instances are left alone */
1398	if (instance_started(inst) == 1) {
1399		log_framework(LOG_DEBUG,
1400		    "%s: start_instance -> is already started\n",
1401		    inst->ri_i.i_fmri);
1402		return;
1403	}
1404
1405	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
1406
1407	(void) restarter_instance_update_states(local_handle, inst,
1408	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, "none");
1409
1410	info = startd_zalloc(sizeof (fork_info_t));
1411
1412	info->sf_id = inst->ri_id;
1413	info->sf_method_type = METHOD_START;
1414	info->sf_event_type = RERR_NONE;
1415	inst->ri_method_thread = startd_thread_create(method_thread, info);
1416}
1417
1418static int
1419event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
1420{
1421	scf_instance_t *inst;
1422	int ret = 0;
1423
1424	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
1425		return (-1);
1426
1427	ret = restarter_inst_ractions_from_tty(inst);
1428
1429	scf_instance_destroy(inst);
1430	return (ret);
1431}
1432
1433static void
1434maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1435    const char *aux)
1436{
1437	fork_info_t *info;
1438	scf_instance_t *scf_inst = NULL;
1439
1440	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1441	assert(aux != NULL);
1442	assert(rip->ri_method_thread == 0);
1443
1444	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.", aux);
1445	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1446	    rip->ri_i.i_fmri, aux);
1447
1448	/* Services in the maintenance state are ignored */
1449	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
1450		log_framework(LOG_DEBUG,
1451		    "%s: maintain_instance -> is already in maintenance\n",
1452		    rip->ri_i.i_fmri);
1453		return;
1454	}
1455
1456	/*
1457	 * If aux state is "service_request" and
1458	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
1459	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
1460	 */
1461	if (strcmp(aux, "service_request") == 0 && libscf_fmri_get_instance(h,
1462	    rip->ri_i.i_fmri, &scf_inst) == 0) {
1463		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
1464			if (restarter_inst_set_aux_fmri(scf_inst))
1465				log_framework(LOG_DEBUG, "%s: "
1466				    "restarter_inst_set_aux_fmri failed: ",
1467				    rip->ri_i.i_fmri);
1468		} else {
1469			log_framework(LOG_DEBUG, "%s: "
1470			    "restarter_inst_validate_ractions_aux_fmri "
1471			    "failed: ", rip->ri_i.i_fmri);
1472
1473			if (restarter_inst_reset_aux_fmri(scf_inst))
1474				log_framework(LOG_DEBUG, "%s: "
1475				    "restarter_inst_reset_aux_fmri failed: ",
1476				    rip->ri_i.i_fmri);
1477		}
1478		scf_instance_destroy(scf_inst);
1479	}
1480
1481	if (immediate || !instance_started(rip)) {
1482		if (rip->ri_i.i_primary_ctid != 0) {
1483			rip->ri_m_inst = safe_scf_instance_create(h);
1484			rip->ri_mi_deleted = B_FALSE;
1485
1486			libscf_reget_instance(rip);
1487			method_remove_contract(rip, B_TRUE, B_TRUE);
1488
1489			scf_instance_destroy(rip->ri_m_inst);
1490		}
1491
1492		(void) restarter_instance_update_states(h, rip,
1493		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1494		    (char *)aux);
1495		return;
1496	}
1497
1498	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1499	    RESTARTER_STATE_MAINT, RERR_NONE, (char *)aux);
1500
1501	log_transition(rip, MAINT_REQUESTED);
1502
1503	info = startd_zalloc(sizeof (*info));
1504	info->sf_id = rip->ri_id;
1505	info->sf_method_type = METHOD_STOP;
1506	info->sf_event_type = RERR_RESTART;
1507	rip->ri_method_thread = startd_thread_create(method_thread, info);
1508}
1509
1510static void
1511refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
1512{
1513	scf_instance_t *inst;
1514	scf_snapshot_t *snap;
1515	fork_info_t *info;
1516	int r;
1517
1518	assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
1519
1520	log_instance(rip, B_TRUE, "Rereading configuration.");
1521	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
1522	    rip->ri_i.i_fmri);
1523
1524rep_retry:
1525	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
1526	switch (r) {
1527	case 0:
1528		break;
1529
1530	case ECONNABORTED:
1531		libscf_handle_rebind(h);
1532		goto rep_retry;
1533
1534	case ENOENT:
1535		/* Must have been deleted. */
1536		return;
1537
1538	case EINVAL:
1539	case ENOTSUP:
1540	default:
1541		bad_error("libscf_fmri_get_instance", r);
1542	}
1543
1544	snap = libscf_get_running_snapshot(inst);
1545
1546	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
1547	    &rip->ri_utmpx_prefix);
1548	switch (r) {
1549	case 0:
1550		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
1551		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
1552		break;
1553
1554	case ECONNABORTED:
1555		scf_instance_destroy(inst);
1556		scf_snapshot_destroy(snap);
1557		libscf_handle_rebind(h);
1558		goto rep_retry;
1559
1560	case ECANCELED:
1561	case ENOENT:
1562		/* Succeed in anticipation of REMOVE_INSTANCE. */
1563		break;
1564
1565	default:
1566		bad_error("libscf_get_startd_properties", r);
1567	}
1568
1569	if (instance_started(rip)) {
1570		/* Refresh does not change the state. */
1571		(void) restarter_instance_update_states(h, rip,
1572		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE, NULL);
1573
1574		info = startd_zalloc(sizeof (*info));
1575		info->sf_id = rip->ri_id;
1576		info->sf_method_type = METHOD_REFRESH;
1577		info->sf_event_type = RERR_REFRESH;
1578
1579		assert(rip->ri_method_thread == 0);
1580		rip->ri_method_thread =
1581		    startd_thread_create(method_thread, info);
1582	}
1583
1584	scf_snapshot_destroy(snap);
1585	scf_instance_destroy(inst);
1586}
1587
1588const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
1589	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
1590	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
1591	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
1592	"INVALID_DEPENDENCY", "ADMIN_DISABLE"
1593};
1594
1595/*
1596 * void *restarter_process_events()
1597 *
1598 *   Called in a separate thread to process the events on an instance's
1599 *   queue.  Empties the queue completely, and tries to keep the thread
1600 *   around for a little while after the queue is empty to save on
1601 *   startup costs.
1602 */
1603static void *
1604restarter_process_events(void *arg)
1605{
1606	scf_handle_t *h;
1607	restarter_instance_qentry_t *event;
1608	restarter_inst_t *rip;
1609	char *fmri = (char *)arg;
1610	struct timespec to;
1611
1612	assert(fmri != NULL);
1613
1614	h = libscf_handle_create_bound_loop();
1615
1616	/* grab the queue lock */
1617	rip = inst_lookup_queue(fmri);
1618	if (rip == NULL)
1619		goto out;
1620
1621again:
1622
1623	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
1624		restarter_inst_t *inst;
1625
1626		/* drop the queue lock */
1627		MUTEX_UNLOCK(&rip->ri_queue_lock);
1628
1629		/*
1630		 * Grab the inst lock -- this waits until any outstanding
1631		 * method finishes running.
1632		 */
1633		inst = inst_lookup_by_name(fmri);
1634		if (inst == NULL) {
1635			/* Getting deleted in the middle isn't an error. */
1636			goto cont;
1637		}
1638
1639		assert(instance_in_transition(inst) == 0);
1640
1641		/* process the event */
1642		switch (event->riq_type) {
1643		case RESTARTER_EVENT_TYPE_ENABLE:
1644		case RESTARTER_EVENT_TYPE_DISABLE:
1645		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1646			(void) enable_inst(h, inst, event->riq_type);
1647			break;
1648
1649		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1650			restarter_delete_inst(inst);
1651			inst = NULL;
1652			goto cont;
1653
1654		case RESTARTER_EVENT_TYPE_STOP:
1655			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
1656			break;
1657
1658		case RESTARTER_EVENT_TYPE_START:
1659			start_instance(h, inst);
1660			break;
1661
1662		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1663			maintain_instance(h, inst, 0, "dependency_cycle");
1664			break;
1665
1666		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1667			maintain_instance(h, inst, 0, "invalid_dependency");
1668			break;
1669
1670		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1671			if (event_from_tty(h, inst) == 0)
1672				maintain_instance(h, inst, 0,
1673				    "service_request");
1674			else
1675				maintain_instance(h, inst, 0,
1676				    "administrative_request");
1677			break;
1678
1679		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1680			if (event_from_tty(h, inst) == 0)
1681				maintain_instance(h, inst, 1,
1682				    "service_request");
1683			else
1684				maintain_instance(h, inst, 1,
1685				    "administrative_request");
1686			break;
1687
1688		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1689			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
1690			break;
1691
1692		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1693			refresh_instance(h, inst);
1694			break;
1695
1696		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1697			log_framework(LOG_WARNING, "Restarter: "
1698			    "%s command (for %s) unimplemented.\n",
1699			    event_names[event->riq_type], inst->ri_i.i_fmri);
1700			break;
1701
1702		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1703			if (!instance_started(inst)) {
1704				log_framework(LOG_DEBUG, "Restarter: "
1705				    "Not restarting %s; not running.\n",
1706				    inst->ri_i.i_fmri);
1707			} else {
1708				/*
1709				 * Stop the instance.  If it can be restarted,
1710				 * the graph engine will send a new event.
1711				 */
1712				(void) stop_instance(h, inst, RSTOP_RESTART);
1713			}
1714			break;
1715
1716		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1717		default:
1718#ifndef NDEBUG
1719			uu_warn("%s:%d: Bad restarter event %d.  "
1720			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
1721#endif
1722			abort();
1723		}
1724
1725		assert(inst != NULL);
1726		MUTEX_UNLOCK(&inst->ri_lock);
1727
1728cont:
1729		/* grab the queue lock */
1730		rip = inst_lookup_queue(fmri);
1731		if (rip == NULL)
1732			goto out;
1733
1734		/* delete the event */
1735		uu_list_remove(rip->ri_queue, event);
1736		startd_free(event, sizeof (restarter_instance_qentry_t));
1737	}
1738
1739	assert(rip != NULL);
1740
1741	/*
1742	 * Try to preserve the thread for a little while for future use.
1743	 */
1744	to.tv_sec = 3;
1745	to.tv_nsec = 0;
1746	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
1747	    &rip->ri_queue_lock, &to);
1748
1749	if (uu_list_first(rip->ri_queue) != NULL)
1750		goto again;
1751
1752	rip->ri_queue_thread = 0;
1753	MUTEX_UNLOCK(&rip->ri_queue_lock);
1754out:
1755	(void) scf_handle_unbind(h);
1756	scf_handle_destroy(h);
1757	free(fmri);
1758	return (NULL);
1759}
1760
1761static int
1762is_admin_event(restarter_event_type_t t) {
1763
1764	switch (t) {
1765	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1766	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
1767	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1768	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1769	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
1770	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1771		return (1);
1772	default:
1773		return (0);
1774	}
1775}
1776
1777static void
1778restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
1779{
1780	restarter_instance_qentry_t *qe;
1781	int r;
1782
1783	assert(PTHREAD_MUTEX_HELD(&ri->ri_queue_lock));
1784	assert(!PTHREAD_MUTEX_HELD(&ri->ri_lock));
1785
1786	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
1787	qe->riq_type = e->rpe_type;
1788
1789	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
1790	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
1791	assert(r == 0);
1792}
1793
1794/*
1795 * void *restarter_event_thread()
1796 *
1797 *  Handle incoming graph events by placing them on a per-instance
1798 *  queue.  We can't lock the main part of the instance structure, so
1799 *  just modify the seprarately locked event queue portion.
1800 */
1801/*ARGSUSED*/
1802static void *
1803restarter_event_thread(void *unused)
1804{
1805	scf_handle_t *h;
1806
1807	/*
1808	 * This is a new thread, and thus, gets its own handle
1809	 * to the repository.
1810	 */
1811	h = libscf_handle_create_bound_loop();
1812
1813	MUTEX_LOCK(&ru->restarter_update_lock);
1814
1815	/*CONSTCOND*/
1816	while (1) {
1817		restarter_protocol_event_t *e;
1818
1819		while (ru->restarter_update_wakeup == 0)
1820			(void) pthread_cond_wait(&ru->restarter_update_cv,
1821			    &ru->restarter_update_lock);
1822
1823		ru->restarter_update_wakeup = 0;
1824
1825		while ((e = restarter_event_dequeue()) != NULL) {
1826			restarter_inst_t *rip;
1827			char *fmri;
1828
1829			MUTEX_UNLOCK(&ru->restarter_update_lock);
1830
1831			/*
1832			 * ADD_INSTANCE is special: there's likely no
1833			 * instance structure yet, so we need to handle the
1834			 * addition synchronously.
1835			 */
1836			switch (e->rpe_type) {
1837			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
1838				if (restarter_insert_inst(h, e->rpe_inst) != 0)
1839					log_error(LOG_INFO, "Restarter: "
1840					    "Could not add %s.\n", e->rpe_inst);
1841
1842				MUTEX_LOCK(&st->st_load_lock);
1843				if (--st->st_load_instances == 0)
1844					(void) pthread_cond_broadcast(
1845					    &st->st_load_cv);
1846				MUTEX_UNLOCK(&st->st_load_lock);
1847
1848				goto nolookup;
1849			}
1850
1851			/*
1852			 * Lookup the instance, locking only the event queue.
1853			 * Can't grab ri_lock here because it might be held
1854			 * by a long-running method.
1855			 */
1856			rip = inst_lookup_queue(e->rpe_inst);
1857			if (rip == NULL) {
1858				log_error(LOG_INFO, "Restarter: "
1859				    "Ignoring %s command for unknown service "
1860				    "%s.\n", event_names[e->rpe_type],
1861				    e->rpe_inst);
1862				goto nolookup;
1863			}
1864
1865			/* Keep ADMIN events from filling up the queue. */
1866			if (is_admin_event(e->rpe_type) &&
1867			    uu_list_numnodes(rip->ri_queue) >
1868			    RINST_QUEUE_THRESHOLD) {
1869				MUTEX_UNLOCK(&rip->ri_queue_lock);
1870				log_instance(rip, B_TRUE, "Instance event "
1871				    "queue overflow.  Dropping administrative "
1872				    "request.");
1873				log_framework(LOG_DEBUG, "%s: Instance event "
1874				    "queue overflow.  Dropping administrative "
1875				    "request.\n", rip->ri_i.i_fmri);
1876				goto nolookup;
1877			}
1878
1879			/* Now add the event to the instance queue. */
1880			restarter_queue_event(rip, e);
1881
1882			if (rip->ri_queue_thread == 0) {
1883				/*
1884				 * Start a thread if one isn't already
1885				 * running.
1886				 */
1887				fmri = safe_strdup(e->rpe_inst);
1888				rip->ri_queue_thread =  startd_thread_create(
1889				    restarter_process_events, (void *)fmri);
1890			} else {
1891				/*
1892				 * Signal the existing thread that there's
1893				 * a new event.
1894				 */
1895				(void) pthread_cond_broadcast(
1896				    &rip->ri_queue_cv);
1897			}
1898
1899			MUTEX_UNLOCK(&rip->ri_queue_lock);
1900nolookup:
1901			restarter_event_release(e);
1902
1903			MUTEX_LOCK(&ru->restarter_update_lock);
1904		}
1905	}
1906
1907	/*
1908	 * Unreachable for now -- there's currently no graceful cleanup
1909	 * called on exit().
1910	 */
1911	(void) scf_handle_unbind(h);
1912	scf_handle_destroy(h);
1913	return (NULL);
1914}
1915
1916static restarter_inst_t *
1917contract_to_inst(ctid_t ctid)
1918{
1919	restarter_inst_t *inst;
1920	int id;
1921
1922	id = lookup_inst_by_contract(ctid);
1923	if (id == -1)
1924		return (NULL);
1925
1926	inst = inst_lookup_by_id(id);
1927	if (inst != NULL) {
1928		/*
1929		 * Since ri_lock isn't held by the contract id lookup, this
1930		 * instance may have been restarted and now be in a new
1931		 * contract, making the old contract no longer valid for this
1932		 * instance.
1933		 */
1934		if (ctid != inst->ri_i.i_primary_ctid) {
1935			MUTEX_UNLOCK(&inst->ri_lock);
1936			inst = NULL;
1937		}
1938	}
1939	return (inst);
1940}
1941
1942/*
1943 * void contract_action()
1944 *   Take action on contract events.
1945 */
1946static void
1947contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
1948    uint32_t type)
1949{
1950	const char *fmri = inst->ri_i.i_fmri;
1951
1952	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
1953
1954	/*
1955	 * If startd has stopped this contract, there is no need to
1956	 * stop it again.
1957	 */
1958	if (inst->ri_i.i_primary_ctid > 0 &&
1959	    inst->ri_i.i_primary_ctid_stopped)
1960		return;
1961
1962	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
1963	    | CT_PR_EV_HWERR)) == 0) {
1964		/*
1965		 * There shouldn't be other events, since that's not how we set
1966		 * the terms. Thus, just log an error and drive on.
1967		 */
1968		log_framework(LOG_NOTICE,
1969		    "%s: contract %ld received unexpected critical event "
1970		    "(%d)\n", fmri, id, type);
1971		return;
1972	}
1973
1974	assert(instance_in_transition(inst) == 0);
1975
1976	if (instance_is_wait_style(inst)) {
1977		/*
1978		 * We ignore all events; if they impact the
1979		 * process we're monitoring, then the
1980		 * wait_thread will stop the instance.
1981		 */
1982		log_framework(LOG_DEBUG,
1983		    "%s: ignoring contract event on wait-style service\n",
1984		    fmri);
1985	} else {
1986		/*
1987		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
1988		 */
1989		switch (type) {
1990		case CT_PR_EV_EMPTY:
1991			(void) stop_instance(h, inst, RSTOP_EXIT);
1992			break;
1993		case CT_PR_EV_CORE:
1994			(void) stop_instance(h, inst, RSTOP_CORE);
1995			break;
1996		case CT_PR_EV_SIGNAL:
1997			(void) stop_instance(h, inst, RSTOP_SIGNAL);
1998			break;
1999		case CT_PR_EV_HWERR:
2000			(void) stop_instance(h, inst, RSTOP_HWERR);
2001			break;
2002		}
2003	}
2004}
2005
2006/*
2007 * void *restarter_contract_event_thread(void *)
2008 *   Listens to the process contract bundle for critical events, taking action
2009 *   on events from contracts we know we are responsible for.
2010 */
2011/*ARGSUSED*/
2012static void *
2013restarter_contracts_event_thread(void *unused)
2014{
2015	int fd, err;
2016	scf_handle_t *local_handle;
2017
2018	/*
2019	 * Await graph load completion.  That is, stop here, until we've scanned
2020	 * the repository for contract - instance associations.
2021	 */
2022	MUTEX_LOCK(&st->st_load_lock);
2023	while (!(st->st_load_complete && st->st_load_instances == 0))
2024		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
2025	MUTEX_UNLOCK(&st->st_load_lock);
2026
2027	/*
2028	 * This is a new thread, and thus, gets its own handle
2029	 * to the repository.
2030	 */
2031	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
2032		uu_die("Unable to bind a new repository handle: %s\n",
2033		    scf_strerror(scf_error()));
2034
2035	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
2036	if (fd == -1)
2037		uu_die("process bundle open failed");
2038
2039	/*
2040	 * Make sure we get all events (including those generated by configd
2041	 * before this thread was started).
2042	 */
2043	err = ct_event_reset(fd);
2044	assert(err == 0);
2045
2046	for (;;) {
2047		int efd, sfd;
2048		ct_evthdl_t ev;
2049		uint32_t type;
2050		ctevid_t evid;
2051		ct_stathdl_t status;
2052		ctid_t ctid;
2053		restarter_inst_t *inst;
2054		uint64_t cookie;
2055
2056		if (err = ct_event_read_critical(fd, &ev)) {
2057			log_error(LOG_WARNING,
2058			    "Error reading next contract event: %s",
2059			    strerror(err));
2060			continue;
2061		}
2062
2063		evid = ct_event_get_evid(ev);
2064		ctid = ct_event_get_ctid(ev);
2065		type = ct_event_get_type(ev);
2066
2067		/* Fetch cookie. */
2068		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
2069		    < 0) {
2070			ct_event_free(ev);
2071			continue;
2072		}
2073
2074		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
2075			log_framework(LOG_WARNING, "Could not get status for "
2076			    "contract %ld: %s\n", ctid, strerror(err));
2077
2078			startd_close(sfd);
2079			ct_event_free(ev);
2080			continue;
2081		}
2082
2083		cookie = ct_status_get_cookie(status);
2084
2085		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
2086		    "cookie %lld\n", type, ctid, cookie);
2087
2088		ct_status_free(status);
2089
2090		startd_close(sfd);
2091
2092		/*
2093		 * svc.configd(1M) restart handling performed by the
2094		 * fork_configd_thread.  We don't acknowledge, as that thread
2095		 * will do so.
2096		 */
2097		if (cookie == CONFIGD_COOKIE) {
2098			ct_event_free(ev);
2099			continue;
2100		}
2101
2102		inst = NULL;
2103		if (storing_contract != 0 &&
2104		    (inst = contract_to_inst(ctid)) == NULL) {
2105			/*
2106			 * This can happen for two reasons:
2107			 * - method_run() has not yet stored the
2108			 *    the contract into the internal hash table.
2109			 * - we receive an EMPTY event for an abandoned
2110			 *    contract.
2111			 * If there is any contract in the process of
2112			 * being stored into the hash table then re-read
2113			 * the event later.
2114			 */
2115			log_framework(LOG_DEBUG,
2116			    "Reset event %d for unknown "
2117			    "contract id %ld\n", type, ctid);
2118
2119			/* don't go too fast */
2120			(void) poll(NULL, 0, 100);
2121
2122			(void) ct_event_reset(fd);
2123			ct_event_free(ev);
2124			continue;
2125		}
2126
2127		/*
2128		 * Do not call contract_to_inst() again if first
2129		 * call succeeded.
2130		 */
2131		if (inst == NULL)
2132			inst = contract_to_inst(ctid);
2133		if (inst == NULL) {
2134			/*
2135			 * This can happen if we receive an EMPTY
2136			 * event for an abandoned contract.
2137			 */
2138			log_framework(LOG_DEBUG,
2139			    "Received event %d for unknown contract id "
2140			    "%ld\n", type, ctid);
2141		} else {
2142			log_framework(LOG_DEBUG,
2143			    "Received event %d for contract id "
2144			    "%ld (%s)\n", type, ctid,
2145			    inst->ri_i.i_fmri);
2146
2147			contract_action(local_handle, inst, ctid, type);
2148
2149			MUTEX_UNLOCK(&inst->ri_lock);
2150		}
2151
2152		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
2153		    O_WRONLY);
2154		if (efd != -1) {
2155			(void) ct_ctl_ack(efd, evid);
2156			startd_close(efd);
2157		}
2158
2159		ct_event_free(ev);
2160
2161	}
2162
2163	/*NOTREACHED*/
2164	return (NULL);
2165}
2166
2167/*
2168 * Timeout queue, processed by restarter_timeouts_event_thread().
2169 */
2170timeout_queue_t *timeouts;
2171static uu_list_pool_t *timeout_pool;
2172
2173typedef struct timeout_update {
2174	pthread_mutex_t		tu_lock;
2175	pthread_cond_t		tu_cv;
2176	int			tu_wakeup;
2177} timeout_update_t;
2178
2179timeout_update_t *tu;
2180
2181static const char *timeout_ovr_svcs[] = {
2182	"svc:/system/manifest-import:default",
2183	"svc:/network/initial:default",
2184	"svc:/network/service:default",
2185	"svc:/system/rmtmpfiles:default",
2186	"svc:/network/loopback:default",
2187	"svc:/network/physical:default",
2188	"svc:/system/device/local:default",
2189	"svc:/system/metainit:default",
2190	"svc:/system/filesystem/usr:default",
2191	"svc:/system/filesystem/minimal:default",
2192	"svc:/system/filesystem/local:default",
2193	NULL
2194};
2195
2196int
2197is_timeout_ovr(restarter_inst_t *inst)
2198{
2199	int i;
2200
2201	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
2202		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
2203			log_instance(inst, B_TRUE, "Timeout override by "
2204			    "svc.startd.  Using infinite timeout.");
2205			return (1);
2206		}
2207	}
2208
2209	return (0);
2210}
2211
2212/*ARGSUSED*/
2213static int
2214timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
2215{
2216	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
2217	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
2218
2219	if (t1 > t2)
2220		return (1);
2221	else if (t1 < t2)
2222		return (-1);
2223	return (0);
2224}
2225
2226void
2227timeout_init()
2228{
2229	timeouts = startd_zalloc(sizeof (timeout_queue_t));
2230
2231	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
2232
2233	timeout_pool = startd_list_pool_create("timeouts",
2234	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
2235	    timeout_compare, UU_LIST_POOL_DEBUG);
2236	assert(timeout_pool != NULL);
2237
2238	timeouts->tq_list = startd_list_create(timeout_pool,
2239	    timeouts, UU_LIST_SORTED);
2240	assert(timeouts->tq_list != NULL);
2241
2242	tu = startd_zalloc(sizeof (timeout_update_t));
2243	(void) pthread_cond_init(&tu->tu_cv, NULL);
2244	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
2245}
2246
2247void
2248timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
2249{
2250	hrtime_t now, timeout;
2251	timeout_entry_t *entry;
2252	uu_list_index_t idx;
2253
2254	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
2255
2256	now = gethrtime();
2257
2258	/*
2259	 * If we overflow LLONG_MAX, we're never timing out anyways, so
2260	 * just return.
2261	 */
2262	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
2263		log_instance(inst, B_TRUE, "timeout_seconds too large, "
2264		    "treating as infinite.");
2265		return;
2266	}
2267
2268	/* hrtime is in nanoseconds. Convert timeout_sec. */
2269	timeout = now + (timeout_sec * 1000000000LL);
2270
2271	entry = startd_alloc(sizeof (timeout_entry_t));
2272	entry->te_timeout = timeout;
2273	entry->te_ctid = cid;
2274	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
2275	entry->te_logstem = safe_strdup(inst->ri_logstem);
2276	entry->te_fired = 0;
2277	/* Insert the calculated timeout time onto the queue. */
2278	MUTEX_LOCK(&timeouts->tq_lock);
2279	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
2280	uu_list_node_init(entry, &entry->te_link, timeout_pool);
2281	uu_list_insert(timeouts->tq_list, entry, idx);
2282	MUTEX_UNLOCK(&timeouts->tq_lock);
2283
2284	assert(inst->ri_timeout == NULL);
2285	inst->ri_timeout = entry;
2286
2287	MUTEX_LOCK(&tu->tu_lock);
2288	tu->tu_wakeup = 1;
2289	(void) pthread_cond_broadcast(&tu->tu_cv);
2290	MUTEX_UNLOCK(&tu->tu_lock);
2291}
2292
2293
2294void
2295timeout_remove(restarter_inst_t *inst, ctid_t cid)
2296{
2297	assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
2298
2299	if (inst->ri_timeout == NULL)
2300		return;
2301
2302	assert(inst->ri_timeout->te_ctid == cid);
2303
2304	MUTEX_LOCK(&timeouts->tq_lock);
2305	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
2306	MUTEX_UNLOCK(&timeouts->tq_lock);
2307
2308	free(inst->ri_timeout->te_fmri);
2309	free(inst->ri_timeout->te_logstem);
2310	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
2311	inst->ri_timeout = NULL;
2312}
2313
2314static int
2315timeout_now()
2316{
2317	timeout_entry_t *e;
2318	hrtime_t now;
2319	int ret;
2320
2321	now = gethrtime();
2322
2323	/*
2324	 * Walk through the (sorted) timeouts list.  While the timeout
2325	 * at the head of the list is <= the current time, kill the
2326	 * method.
2327	 */
2328	MUTEX_LOCK(&timeouts->tq_lock);
2329
2330	for (e = uu_list_first(timeouts->tq_list);
2331	    e != NULL && e->te_timeout <= now;
2332	    e = uu_list_next(timeouts->tq_list, e)) {
2333		log_framework(LOG_WARNING, "%s: Method or service exit timed "
2334		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
2335		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
2336		    "Method or service exit timed out.  Killing contract %ld.",
2337		    e->te_ctid);
2338		e->te_fired = 1;
2339		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
2340	}
2341
2342	if (uu_list_numnodes(timeouts->tq_list) > 0)
2343		ret = 0;
2344	else
2345		ret = -1;
2346
2347	MUTEX_UNLOCK(&timeouts->tq_lock);
2348
2349	return (ret);
2350}
2351
2352/*
2353 * void *restarter_timeouts_event_thread(void *)
2354 *   Responsible for monitoring the method timeouts.  This thread must
2355 *   be started before any methods are called.
2356 */
2357/*ARGSUSED*/
2358static void *
2359restarter_timeouts_event_thread(void *unused)
2360{
2361	/*
2362	 * Timeouts are entered on a priority queue, which is processed by
2363	 * this thread.  As timeouts are specified in seconds, we'll do
2364	 * the necessary processing every second, as long as the queue
2365	 * is not empty.
2366	 */
2367
2368	/*CONSTCOND*/
2369	while (1) {
2370		/*
2371		 * As long as the timeout list isn't empty, process it
2372		 * every second.
2373		 */
2374		if (timeout_now() == 0) {
2375			(void) sleep(1);
2376			continue;
2377		}
2378
2379		/* The list is empty, wait until we have more timeouts. */
2380		MUTEX_LOCK(&tu->tu_lock);
2381
2382		while (tu->tu_wakeup == 0)
2383			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
2384
2385		tu->tu_wakeup = 0;
2386		MUTEX_UNLOCK(&tu->tu_lock);
2387	}
2388
2389	return (NULL);
2390}
2391
2392void
2393restarter_start()
2394{
2395	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
2396	(void) startd_thread_create(restarter_event_thread, NULL);
2397	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
2398	(void) startd_thread_create(wait_thread, NULL);
2399}
2400
2401
2402void
2403restarter_init()
2404{
2405	restarter_instance_pool = startd_list_pool_create("restarter_instances",
2406	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
2407	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
2408	(void) memset(&instance_list, 0, sizeof (instance_list));
2409
2410	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
2411	instance_list.ril_instance_list = startd_list_create(
2412	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
2413
2414	restarter_queue_pool = startd_list_pool_create(
2415	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
2416	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
2417	    UU_LIST_POOL_DEBUG);
2418
2419	contract_list_pool = startd_list_pool_create(
2420	    "contract_list", sizeof (contract_entry_t),
2421	    offsetof(contract_entry_t,  ce_link), NULL,
2422	    UU_LIST_POOL_DEBUG);
2423	contract_hash_init();
2424
2425	log_framework(LOG_DEBUG, "Initialized restarter\n");
2426}
2427