inetd.c revision 3175:5903f61aa150
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28/*
29 * NOTES: To be expanded.
30 *
31 * The SMF inetd.
32 *
33 * Below are some high level notes of the operation of the SMF inetd. The
34 * notes don't go into any real detail, and the viewer of this file is
35 * encouraged to look at the code and its associated comments to better
36 * understand inetd's operation. This saves the potential for the code
37 * and these notes diverging over time.
38 *
39 * Inetd's major work is done from the context of event_loop(). Within this
40 * loop, inetd polls for events arriving from a number of different file
41 * descriptors, representing the following event types, and initiates
42 * any necessary event processing:
43 * - incoming network connections/datagrams.
44 * - notification of terminated processes (discovered via contract events).
45 * - instance specific events originating from the SMF master restarter.
46 * - stop/refresh requests from the inetd method processes (coming in on a
47 *   Unix Domain socket).
48 * There's also a timeout set for the poll, which is set to the nearest
49 * scheduled timer in a timer queue that inetd uses to perform delayed
50 * processing, such as bind retries.
51 * The SIGHUP and SIGINT signals can also interrupt the poll, and will
52 * result in inetd being refreshed or stopped respectively, as was the
53 * behavior with the old inetd.
54 *
55 * Inetd implements a state machine for each instance. The states within the
56 * machine are: offline, online, disabled, maintenance, uninitialized and
57 * specializations of the offline state for when an instance exceeds one of
58 * its DOS limits. The state of an instance can be changed as a
59 * result/side-effect of one of the above events occurring, or inetd being
60 * started up. The ongoing state of an instance is stored in the SMF
61 * repository, as required of SMF restarters. This enables an administrator
62 * to view the state of each instance, and, if inetd was to terminate
63 * unexpectedly, it could use the stored state to re-commence where it left off.
64 *
65 * Within the state machine a number of methods are run (if provided) as part
66 * of a state transition to aid/ effect a change in an instance's state. The
67 * supported methods are: offline, online, disable, refresh and start. The
68 * latter of these is the equivalent of the server program and its arguments
69 * in the old inetd.
70 *
71 * Events from the SMF master restarter come in on a number of threads
72 * created in the registration routine of librestart, the delegated restarter
73 * library. These threads call into the restart_event_proxy() function
74 * when an event arrives. To serialize the processing of instances, these events
75 * are then written down a pipe to the process's main thread, which listens
76 * for these events via a poll call, with the file descriptor of the other
77 * end of the pipe in its read set, and processes the event appropriately.
78 * When the event has been  processed (which may be delayed if the instance
79 * for which the event is for is in the process of executing one of its methods
80 * as part of a state transition) it writes an acknowledgement back down the
81 * pipe the event was received on. The thread in restart_event_proxy() that
82 * wrote the event will read the acknowledgement it was blocked upon, and will
83 * then be able to return to its caller, thus implicitly acknowledging the
84 * event, and allowing another event to be written down the pipe for the main
85 * thread to process.
86 */
87
88
89#include <netdb.h>
90#include <stdio.h>
91#include <stdio_ext.h>
92#include <stdlib.h>
93#include <strings.h>
94#include <unistd.h>
95#include <assert.h>
96#include <sys/types.h>
97#include <sys/socket.h>
98#include <netinet/in.h>
99#include <fcntl.h>
100#include <signal.h>
101#include <errno.h>
102#include <locale.h>
103#include <syslog.h>
104#include <libintl.h>
105#include <librestart.h>
106#include <pthread.h>
107#include <sys/stat.h>
108#include <time.h>
109#include <limits.h>
110#include <libgen.h>
111#include <tcpd.h>
112#include <libscf.h>
113#include <libuutil.h>
114#include <stddef.h>
115#include <bsm/adt_event.h>
116#include "inetd_impl.h"
117
118/* path to inetd's binary */
119#define	INETD_PATH	"/usr/lib/inet/inetd"
120
121/*
122 * inetd's default configuration file paths. /etc/inetd/inetd.conf is set
123 * be be the primary file, so it is checked before /etc/inetd.conf.
124 */
125#define	PRIMARY_DEFAULT_CONF_FILE	"/etc/inet/inetd.conf"
126#define	SECONDARY_DEFAULT_CONF_FILE	"/etc/inetd.conf"
127
128/* Arguments passed to this binary to request which method to execute. */
129#define	START_METHOD_ARG	"start"
130#define	STOP_METHOD_ARG		"stop"
131#define	REFRESH_METHOD_ARG	"refresh"
132
133/* connection backlog for unix domain socket */
134#define	UDS_BACKLOG	2
135
136/* number of retries to recv() a request on the UDS socket before giving up */
137#define	UDS_RECV_RETRIES	10
138
139/* enumeration of the different ends of a pipe */
140enum pipe_end {
141	PE_CONSUMER,
142	PE_PRODUCER
143};
144
145typedef struct {
146	internal_inst_state_t		istate;
147	const char			*name;
148	restarter_instance_state_t	smf_state;
149	instance_method_t		method_running;
150} state_info_t;
151
152
153/*
154 * Collection of information for each state.
155 * NOTE:  This table is indexed into using the internal_inst_state_t
156 * enumeration, so the ordering needs to be kept in synch.
157 */
158static state_info_t states[] = {
159	{IIS_UNINITIALIZED, "uninitialized", RESTARTER_STATE_UNINIT,
160	    IM_NONE},
161	{IIS_ONLINE, "online", RESTARTER_STATE_ONLINE, IM_START},
162	{IIS_IN_ONLINE_METHOD, "online_method", RESTARTER_STATE_OFFLINE,
163	    IM_ONLINE},
164	{IIS_OFFLINE, "offline", RESTARTER_STATE_OFFLINE, IM_NONE},
165	{IIS_IN_OFFLINE_METHOD, "offline_method", RESTARTER_STATE_OFFLINE,
166	    IM_OFFLINE},
167	{IIS_DISABLED, "disabled", RESTARTER_STATE_DISABLED, IM_NONE},
168	{IIS_IN_DISABLE_METHOD, "disabled_method", RESTARTER_STATE_OFFLINE,
169	    IM_DISABLE},
170	{IIS_IN_REFRESH_METHOD, "refresh_method", RESTARTER_STATE_ONLINE,
171	    IM_REFRESH},
172	{IIS_MAINTENANCE, "maintenance", RESTARTER_STATE_MAINT, IM_NONE},
173	{IIS_OFFLINE_CONRATE, "cr_offline", RESTARTER_STATE_OFFLINE, IM_NONE},
174	{IIS_OFFLINE_BIND, "bind_offline", RESTARTER_STATE_OFFLINE, IM_NONE},
175	{IIS_OFFLINE_COPIES, "copies_offline", RESTARTER_STATE_OFFLINE,
176	    IM_NONE},
177	{IIS_DEGRADED, "degraded", RESTARTER_STATE_DEGRADED, IM_NONE},
178	{IIS_NONE, "none", RESTARTER_STATE_NONE, IM_NONE}
179};
180
181/*
182 * Pipe used to send events from the threads created by restarter_bind_handle()
183 * to the main thread of control.
184 */
185static int			rst_event_pipe[] = {-1, -1};
186/*
187 * Used to protect the critical section of code in restarter_event_proxy() that
188 * involves writing an event down the event pipe and reading an acknowledgement.
189 */
190static pthread_mutex_t		rst_event_pipe_mtx = PTHREAD_MUTEX_INITIALIZER;
191
192/* handle used in communication with the master restarter */
193static restarter_event_handle_t *rst_event_handle = NULL;
194
195/* set to indicate a refresh of inetd is requested */
196static boolean_t		refresh_inetd_requested = B_FALSE;
197
198/* set by the SIGTERM handler to flag we got a SIGTERM */
199static boolean_t		got_sigterm = B_FALSE;
200
201/*
202 * Timer queue used to store timers for delayed event processing, such as
203 * bind retries.
204 */
205iu_tq_t				*timer_queue = NULL;
206
207/*
208 * fd of Unix Domain socket used to communicate stop and refresh requests
209 * to the inetd start method process.
210 */
211static int			uds_fd = -1;
212
213/*
214 * List of inetd's currently managed instances; each containing its state,
215 * and in certain states its configuration.
216 */
217static uu_list_pool_t		*instance_pool = NULL;
218uu_list_t			*instance_list = NULL;
219
220/* set to indicate we're being stopped */
221boolean_t			inetd_stopping = B_FALSE;
222
223/* TCP wrappers syslog globals. Consumed by libwrap. */
224int				allow_severity = LOG_INFO;
225int				deny_severity = LOG_WARNING;
226
227/* path of the configuration file being monitored by check_conf_file() */
228static char			*conf_file = NULL;
229
230/* Auditing session handle */
231static adt_session_data_t	*audit_handle;
232
233static void uds_fini(void);
234static int uds_init(void);
235static int run_method(instance_t *, instance_method_t, const proto_info_t *);
236static void create_bound_fds(instance_t *);
237static void destroy_bound_fds(instance_t *);
238static void destroy_instance(instance_t *);
239static void inetd_stop(void);
240static void
241exec_method(instance_t *instance, instance_method_t method, method_info_t *mi,
242    struct method_context *mthd_ctxt, const proto_info_t *pi) __NORETURN;
243
244/*
245 * The following two functions are callbacks that libumem uses to determine
246 * inetd's desired debugging/logging levels. The interface they consume is
247 * exported by FMA and is consolidation private. The comments in the two
248 * functions give the environment variable that will effectively be set to
249 * their returned value, and thus whose behavior for this value, described in
250 * umem_debug(3MALLOC), will be followed.
251 */
252
253const char *
254_umem_debug_init(void)
255{
256	return ("default,verbose");	/* UMEM_DEBUG setting */
257}
258
259const char *
260_umem_logging_init(void)
261{
262	return ("fail,contents");	/* UMEM_LOGGING setting */
263}
264
265static void
266log_invalid_cfg(const char *fmri)
267{
268	error_msg(gettext(
269	    "Invalid configuration for instance %s, placing in maintenance"),
270	    fmri);
271}
272
273/*
274 * Returns B_TRUE if the instance is in a suitable state for inetd to stop.
275 */
276static boolean_t
277instance_stopped(const instance_t *inst)
278{
279	return ((inst->cur_istate == IIS_OFFLINE) ||
280	    (inst->cur_istate == IIS_MAINTENANCE) ||
281	    (inst->cur_istate == IIS_DISABLED) ||
282	    (inst->cur_istate == IIS_UNINITIALIZED));
283}
284
285/*
286 * Updates the current and next repository states of instance 'inst'. If
287 * any errors occur an error message is output.
288 */
289static void
290update_instance_states(instance_t *inst, internal_inst_state_t new_cur_state,
291    internal_inst_state_t new_next_state, restarter_error_t err)
292{
293	internal_inst_state_t	old_cur = inst->cur_istate;
294	internal_inst_state_t	old_next = inst->next_istate;
295	scf_error_t		sret;
296	int			ret;
297
298	debug_msg("Entering update_instance_states: oldcur: %s, newcur: %s "
299	    "oldnext: %s, newnext: %s", states[old_cur].name,
300	    states[new_cur_state].name, states[old_next].name,
301	    states[new_next_state].name);
302
303
304	/* update the repository/cached internal state */
305	inst->cur_istate = new_cur_state;
306	inst->next_istate = new_next_state;
307	(void) set_single_rep_val(inst->cur_istate_rep,
308	    (int64_t)new_cur_state);
309	(void) set_single_rep_val(inst->next_istate_rep,
310	    (int64_t)new_next_state);
311
312	if (((sret = store_rep_vals(inst->cur_istate_rep, inst->fmri,
313	    PR_NAME_CUR_INT_STATE)) != 0) ||
314	    ((sret = store_rep_vals(inst->next_istate_rep, inst->fmri,
315	    PR_NAME_NEXT_INT_STATE)) != 0))
316		error_msg(gettext("Failed to update state of instance %s in "
317		    "repository: %s"), inst->fmri, scf_strerror(sret));
318
319	/* update the repository SMF state */
320	if ((ret = restarter_set_states(rst_event_handle, inst->fmri,
321	    states[old_cur].smf_state, states[new_cur_state].smf_state,
322	    states[old_next].smf_state, states[new_next_state].smf_state,
323	    err, 0)) != 0)
324		error_msg(gettext("Failed to update state of instance %s in "
325		    "repository: %s"), inst->fmri, strerror(ret));
326
327}
328
329void
330update_state(instance_t *inst, internal_inst_state_t new_cur,
331    restarter_error_t err)
332{
333	update_instance_states(inst, new_cur, IIS_NONE, err);
334}
335
336/*
337 * Sends a refresh event to the inetd start method process and returns
338 * SMF_EXIT_OK if it managed to send it. If it fails to send the request for
339 * some reason it returns SMF_EXIT_ERR_OTHER.
340 */
341static int
342refresh_method(void)
343{
344	uds_request_t   req = UR_REFRESH_INETD;
345	int		fd;
346
347	debug_msg("Entering refresh_method");
348
349	if ((fd = connect_to_inetd()) < 0) {
350		error_msg(gettext("Failed to connect to inetd: %s"),
351		    strerror(errno));
352		return (SMF_EXIT_ERR_OTHER);
353	}
354
355	/* write the request and return success */
356	if (safe_write(fd, &req, sizeof (req)) == -1) {
357		error_msg(
358		    gettext("Failed to send refresh request to inetd: %s"),
359		    strerror(errno));
360		(void) close(fd);
361		return (SMF_EXIT_ERR_OTHER);
362	}
363
364	(void) close(fd);
365
366	return (SMF_EXIT_OK);
367}
368
369/*
370 * Sends a stop event to the inetd start method process and wait till it goes
371 * away. If inetd is determined to have stopped SMF_EXIT_OK is returned, else
372 * SMF_EXIT_ERR_OTHER is returned.
373 */
374static int
375stop_method(void)
376{
377	uds_request_t   req = UR_STOP_INETD;
378	int		fd;
379	char		c;
380	ssize_t		ret;
381
382	debug_msg("Entering stop_method");
383
384	if ((fd = connect_to_inetd()) == -1) {
385		debug_msg(gettext("Failed to connect to inetd: %s"),
386		    strerror(errno));
387		/*
388		 * Assume connect_to_inetd() failed because inetd was already
389		 * stopped, and return success.
390		 */
391		return (SMF_EXIT_OK);
392	}
393
394	/*
395	 * This is safe to do since we're fired off in a separate process
396	 * than inetd and in the case we get wedged, the stop method timeout
397	 * will occur and we'd be killed by our restarter.
398	 */
399	enable_blocking(fd);
400
401	/* write the stop request to inetd and wait till it goes away */
402	if (safe_write(fd, &req, sizeof (req)) != 0) {
403		error_msg(gettext("Failed to send stop request to inetd"));
404		(void) close(fd);
405		return (SMF_EXIT_ERR_OTHER);
406	}
407
408	/* wait until remote end of socket is closed */
409	while (((ret = recv(fd, &c, sizeof (c), 0)) != 0) && (errno == EINTR))
410		;
411
412	(void) close(fd);
413
414	if (ret != 0) {
415		error_msg(gettext("Failed to determine whether inetd stopped"));
416		return (SMF_EXIT_ERR_OTHER);
417	}
418
419	return (SMF_EXIT_OK);
420}
421
422
423/*
424 * This function is called to handle restarter events coming in from the
425 * master restarter. It is registered with the master restarter via
426 * restarter_bind_handle() and simply passes a pointer to the event down
427 * the event pipe, which will be discovered by the poll in the event loop
428 * and processed there. It waits for an acknowledgement to be written back down
429 * the pipe before returning.
430 * Writing a pointer to the function's 'event' parameter down the pipe will
431 * be safe, as the thread in restarter_event_proxy() doesn't return until
432 * the main thread has finished its processing of the passed event, thus
433 * the referenced event will remain around until the function returns.
434 * To impose the limit of only one event being in the pipe and processed
435 * at once, a lock is taken on entry to this function and returned on exit.
436 * Always returns 0.
437 */
438static int
439restarter_event_proxy(restarter_event_t *event)
440{
441	restarter_event_type_t  ev_type;
442	boolean_t		processed;
443
444	debug_msg("Entering restarter_event_proxy");
445	ev_type = restarter_event_get_type(event);
446	debug_msg("event: %x, event type: %d", event, ev_type);
447
448	(void) pthread_mutex_lock(&rst_event_pipe_mtx);
449
450	/* write the event to the main worker thread down the pipe */
451	if (safe_write(rst_event_pipe[PE_PRODUCER], &event,
452	    sizeof (event)) != 0)
453		goto pipe_error;
454
455	/*
456	 * Wait for an acknowledgement that the event has been processed from
457	 * the same pipe. In the case that inetd is stopping, any thread in
458	 * this function will simply block on this read until inetd eventually
459	 * exits. This will result in this function not returning success to
460	 * its caller, and the event that was being processed when the
461	 * function exited will be re-sent when inetd is next started.
462	 */
463	if (safe_read(rst_event_pipe[PE_PRODUCER], &processed,
464	    sizeof (processed)) != 0)
465		goto pipe_error;
466
467	(void) pthread_mutex_unlock(&rst_event_pipe_mtx);
468
469	return (processed ? 0 : EAGAIN);
470
471pipe_error:
472	/*
473	 * Something's seriously wrong with the event pipe. Notify the
474	 * worker thread by closing this end of the event pipe and pause till
475	 * inetd exits.
476	 */
477	error_msg(gettext("Can't process restarter events: %s"),
478	    strerror(errno));
479	(void) close(rst_event_pipe[PE_PRODUCER]);
480	for (;;)
481		(void) pause();
482
483	/* NOTREACHED */
484}
485
486/*
487 * Let restarter_event_proxy() know we're finished with the event it's blocked
488 * upon. The 'processed' argument denotes whether we successfully processed the
489 * event.
490 */
491static void
492ack_restarter_event(boolean_t processed)
493{
494	debug_msg("Entering ack_restarter_event");
495
496	/*
497	 * If safe_write returns -1 something's seriously wrong with the event
498	 * pipe, so start the shutdown proceedings.
499	 */
500	if (safe_write(rst_event_pipe[PE_CONSUMER], &processed,
501	    sizeof (processed)) == -1)
502		inetd_stop();
503}
504
505/*
506 * Switch the syslog identification string to 'ident'.
507 */
508static void
509change_syslog_ident(const char *ident)
510{
511	debug_msg("Entering change_syslog_ident: ident: %s", ident);
512
513	closelog();
514	openlog(ident, LOG_PID|LOG_CONS, LOG_DAEMON);
515}
516
517/*
518 * Perform TCP wrappers checks on this instance. Due to the fact that the
519 * current wrappers code used in Solaris is taken untouched from the open
520 * source version, we're stuck with using the daemon name for the checks, as
521 * opposed to making use of instance FMRIs. Sigh.
522 * Returns B_TRUE if the check passed, else B_FALSE.
523 */
524static boolean_t
525tcp_wrappers_ok(instance_t *instance)
526{
527	boolean_t		rval = B_TRUE;
528	char			*daemon_name;
529	basic_cfg_t		*cfg = instance->config->basic;
530	struct request_info	req;
531
532	debug_msg("Entering tcp_wrappers_ok, instance: %s", instance->fmri);
533
534	/*
535	 * Wrap the service using libwrap functions. The code below implements
536	 * the functionality of tcpd. This is done only for stream,nowait
537	 * services, following the convention of other vendors.  udp/dgram and
538	 * stream/wait can NOT be wrapped with this libwrap, so be wary of
539	 * changing the test below.
540	 */
541	if (cfg->do_tcp_wrappers && !cfg->iswait && !cfg->istlx) {
542
543		daemon_name = instance->config->methods[
544		    IM_START]->exec_args_we.we_wordv[0];
545		if (*daemon_name == '/')
546			daemon_name = strrchr(daemon_name, '/') + 1;
547
548		/*
549		 * Change the syslog message identity to the name of the
550		 * daemon being wrapped, as opposed to "inetd".
551		 */
552		change_syslog_ident(daemon_name);
553
554		(void) request_init(&req, RQ_DAEMON, daemon_name, RQ_FILE,
555		    instance->conn_fd, NULL);
556		fromhost(&req);
557
558		if (strcasecmp(eval_hostname(req.client), paranoid) == 0) {
559			syslog(deny_severity,
560			    "refused connect from %s (name/address mismatch)",
561			    eval_client(&req));
562			if (req.sink != NULL)
563				req.sink(instance->conn_fd);
564			rval = B_FALSE;
565		} else if (!hosts_access(&req)) {
566			syslog(deny_severity,
567			    "refused connect from %s (access denied)",
568			    eval_client(&req));
569			if (req.sink != NULL)
570				req.sink(instance->conn_fd);
571			rval = B_FALSE;
572		} else {
573			syslog(allow_severity, "connect from %s",
574			    eval_client(&req));
575		}
576
577		/* Revert syslog identity back to "inetd". */
578		change_syslog_ident(SYSLOG_IDENT);
579	}
580	return (rval);
581}
582
583/*
584 * Handler registered with the timer queue code to remove an instance from
585 * the connection rate offline state when it has been there for its allotted
586 * time.
587 */
588/* ARGSUSED */
589static void
590conn_rate_online(iu_tq_t *tq, void *arg)
591{
592	instance_t *instance = arg;
593
594	debug_msg("Entering conn_rate_online, instance: %s",
595	    instance->fmri);
596
597	assert(instance->cur_istate == IIS_OFFLINE_CONRATE);
598	instance->timer_id = -1;
599	update_state(instance, IIS_OFFLINE, RERR_RESTART);
600	process_offline_inst(instance);
601}
602
603/*
604 * Check whether this instance in the offline state is in transition to
605 * another state and do the work to continue this transition.
606 */
607void
608process_offline_inst(instance_t *inst)
609{
610	debug_msg("Entering process_offline_inst");
611
612	if (inst->disable_req) {
613		inst->disable_req = B_FALSE;
614		(void) run_method(inst, IM_DISABLE, NULL);
615	} else if (inst->maintenance_req) {
616		inst->maintenance_req = B_FALSE;
617		update_state(inst, IIS_MAINTENANCE, RERR_RESTART);
618	/*
619	 * If inetd is in the process of stopping, we don't want to enter
620	 * any states but offline, disabled and maintenance.
621	 */
622	} else if (!inetd_stopping) {
623		if (inst->conn_rate_exceeded) {
624			basic_cfg_t *cfg = inst->config->basic;
625
626			inst->conn_rate_exceeded = B_FALSE;
627			update_state(inst, IIS_OFFLINE_CONRATE, RERR_RESTART);
628			/*
629			 * Schedule a timer to bring the instance out of the
630			 * connection rate offline state.
631			 */
632			inst->timer_id = iu_schedule_timer(timer_queue,
633			    cfg->conn_rate_offline, conn_rate_online,
634			    inst);
635			if (inst->timer_id == -1) {
636				error_msg(gettext("%s unable to set timer, "
637				    "won't be brought on line after %d "
638				    "seconds."), inst->fmri,
639				    cfg->conn_rate_offline);
640			}
641
642		} else if (copies_limit_exceeded(inst)) {
643			update_state(inst, IIS_OFFLINE_COPIES, RERR_RESTART);
644		}
645	}
646}
647
648/*
649 * Create a socket bound to the instance's configured address. If the
650 * bind fails, returns -1, else the fd of the bound socket.
651 */
652static int
653create_bound_socket(const char *fmri, socket_info_t *sock_info)
654{
655	int		fd;
656	int		on = 1;
657	rpc_info_t	*rpc = sock_info->pr_info.ri;
658	const char	*proto = sock_info->pr_info.proto;
659
660	debug_msg("Entering create_bound_socket");
661
662	fd = socket(sock_info->local_addr.ss_family, sock_info->type,
663	    sock_info->protocol);
664	if (fd < 0) {
665		error_msg(gettext(
666		    "Socket creation failure for instance %s, proto %s: %s"),
667		    fmri, proto, strerror(errno));
668		return (-1);
669	}
670
671	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof (on)) == -1) {
672		error_msg(gettext("setsockopt SO_REUSEADDR failed for service "
673		    "instance %s, proto %s: %s"), fmri, proto, strerror(errno));
674		(void) close(fd);
675		return (-1);
676	}
677	if (sock_info->pr_info.v6only) {
678		/* restrict socket to IPv6 communications only */
679		if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &on,
680		    sizeof (on)) == -1) {
681			error_msg(gettext("setsockopt IPV6_V6ONLY failed for "
682			    "service instance %s, proto %s: %s"), fmri, proto,
683			    strerror(errno));
684			(void) close(fd);
685			return (-1);
686		}
687	}
688
689	if (rpc != NULL)
690		SS_SETPORT(sock_info->local_addr, 0);
691
692	if (bind(fd, (struct sockaddr *)&(sock_info->local_addr),
693	    SS_ADDRLEN(sock_info->local_addr)) < 0) {
694		error_msg(gettext(
695		    "Failed to bind to the port of service instance %s, "
696		    "proto %s: %s"), fmri, proto, strerror(errno));
697		(void) close(fd);
698		return (-1);
699	}
700
701	/*
702	 * Retrieve and store the address bound to for RPC services.
703	 */
704	if (rpc != NULL) {
705		struct sockaddr_storage	ss;
706		int			ss_size = sizeof (ss);
707
708		if (getsockname(fd, (struct sockaddr *)&ss, &ss_size) < 0) {
709			error_msg(gettext("Failed getsockname for instance %s, "
710			    "proto %s: %s"), fmri, proto, strerror(errno));
711			(void) close(fd);
712			return (-1);
713		}
714		(void) memcpy(rpc->netbuf.buf, &ss,
715		    sizeof (struct sockaddr_storage));
716		rpc->netbuf.len = SS_ADDRLEN(ss);
717		rpc->netbuf.maxlen = SS_ADDRLEN(ss);
718	}
719
720	if (sock_info->type == SOCK_STREAM)
721		(void) listen(fd, CONNECTION_BACKLOG);
722
723	return (fd);
724}
725
726/*
727 * Handler registered with the timer queue code to retry the creation
728 * of a bound fd.
729 */
730/* ARGSUSED */
731static void
732retry_bind(iu_tq_t *tq, void *arg)
733{
734	instance_t *instance = arg;
735
736	debug_msg("Entering retry_bind, instance: %s", instance->fmri);
737
738	switch (instance->cur_istate) {
739	case IIS_OFFLINE_BIND:
740	case IIS_ONLINE:
741	case IIS_DEGRADED:
742	case IIS_IN_ONLINE_METHOD:
743	case IIS_IN_REFRESH_METHOD:
744		break;
745	default:
746#ifndef NDEBUG
747		(void) fprintf(stderr, "%s:%d: Unknown instance state %d.\n",
748		    __FILE__, __LINE__, instance->cur_istate);
749#endif
750		abort();
751	}
752
753	instance->bind_timer_id = -1;
754	create_bound_fds(instance);
755}
756
757/*
758 * For each of the fds for the given instance that are bound, if 'listen' is
759 * set add them to the poll set, else remove them from it. If any additions
760 * fail, returns -1, else 0 on success.
761 */
762int
763poll_bound_fds(instance_t *instance, boolean_t listen)
764{
765	basic_cfg_t	*cfg = instance->config->basic;
766	proto_info_t	*pi;
767	int		ret = 0;
768
769	debug_msg("Entering poll_bound_fds: instance: %s, on: %d",
770	    instance->fmri, listen);
771
772	for (pi = uu_list_first(cfg->proto_list); pi != NULL;
773	    pi = uu_list_next(cfg->proto_list, pi)) {
774		if (pi->listen_fd != -1) {	/* fd bound */
775			if (!listen) {
776				clear_pollfd(pi->listen_fd);
777			} else if (set_pollfd(pi->listen_fd, POLLIN) == -1) {
778				ret = -1;
779			}
780		}
781	}
782
783	return (ret);
784}
785
786/*
787 * Handle the case were we either fail to create a bound fd or we fail
788 * to add a bound fd to the poll set for the given instance.
789 */
790static void
791handle_bind_failure(instance_t *instance)
792{
793	basic_cfg_t *cfg = instance->config->basic;
794
795	debug_msg("Entering handle_bind_failure: instance: %s", instance);
796
797	/*
798	 * We must be being called as a result of a failed poll_bound_fds()
799	 * as a bind retry is already scheduled. Just return and let it do
800	 * the work.
801	 */
802	if (instance->bind_timer_id != -1)
803		return;
804
805	/*
806	 * Check if the rebind retries limit is operative and if so,
807	 * if it has been reached.
808	 */
809	if (((cfg->bind_fail_interval <= 0) ||		/* no retries */
810	    ((cfg->bind_fail_max >= 0) &&		/* limit reached */
811	    (++instance->bind_fail_count > cfg->bind_fail_max))) ||
812	    ((instance->bind_timer_id = iu_schedule_timer(timer_queue,
813	    cfg->bind_fail_interval, retry_bind, instance)) == -1)) {
814		proto_info_t *pi;
815
816		instance->bind_fail_count = 0;
817
818		switch (instance->cur_istate) {
819		case IIS_DEGRADED:
820		case IIS_ONLINE:
821			/* check if any of the fds are being poll'd upon */
822			for (pi = uu_list_first(cfg->proto_list); pi != NULL;
823			    pi = uu_list_next(cfg->proto_list, pi)) {
824				if ((pi->listen_fd != -1) &&
825				    (find_pollfd(pi->listen_fd) != NULL))
826					break;
827			}
828			if (pi != NULL)	{	/* polling on > 0 fds */
829				warn_msg(gettext("Failed to bind on "
830				    "all protocols for instance %s, "
831				    "transitioning to degraded"),
832				    instance->fmri);
833				update_state(instance, IIS_DEGRADED, RERR_NONE);
834				instance->bind_retries_exceeded = B_TRUE;
835				break;
836			}
837
838			destroy_bound_fds(instance);
839			/*
840			 * In the case we failed the 'bind' because set_pollfd()
841			 * failed on all bound fds, use the offline handling.
842			 */
843			/* FALLTHROUGH */
844		case IIS_OFFLINE:
845		case IIS_OFFLINE_BIND:
846			error_msg(gettext("Too many bind failures for instance "
847			"%s, transitioning to maintenance"), instance->fmri);
848			update_state(instance, IIS_MAINTENANCE,
849			    RERR_FAULT);
850			break;
851		case IIS_IN_ONLINE_METHOD:
852		case IIS_IN_REFRESH_METHOD:
853			warn_msg(gettext("Failed to bind on all "
854			    "protocols for instance %s, instance will go to "
855			    "degraded"), instance->fmri);
856			/*
857			 * Set the retries exceeded flag so when the method
858			 * completes the instance goes to the degraded state.
859			 */
860			instance->bind_retries_exceeded = B_TRUE;
861			break;
862		default:
863#ifndef NDEBUG
864			(void) fprintf(stderr,
865			    "%s:%d: Unknown instance state %d.\n",
866			    __FILE__, __LINE__, instance->cur_istate);
867#endif
868			abort();
869		}
870	} else if (instance->cur_istate == IIS_OFFLINE) {
871		/*
872		 * bind re-scheduled, so if we're offline reflect this in the
873		 * state.
874		 */
875		update_state(instance, IIS_OFFLINE_BIND, RERR_NONE);
876	}
877}
878
879
880/*
881 * Check if two transport protocols for RPC conflict.
882 */
883
884boolean_t
885is_rpc_proto_conflict(const char *proto0, const char *proto1) {
886	if (strcmp(proto0, "tcp") == 0) {
887		if (strcmp(proto1, "tcp") == 0)
888			return (B_TRUE);
889		if (strcmp(proto1, "tcp6") == 0)
890			return (B_TRUE);
891		return (B_FALSE);
892	}
893
894	if (strcmp(proto0, "tcp6") == 0) {
895		if (strcmp(proto1, "tcp") == 0)
896			return (B_TRUE);
897		if (strcmp(proto1, "tcp6only") == 0)
898			return (B_TRUE);
899		if (strcmp(proto1, "tcp6") == 0)
900			return (B_TRUE);
901		return (B_FALSE);
902	}
903
904	if (strcmp(proto0, "tcp6only") == 0) {
905		if (strcmp(proto1, "tcp6only") == 0)
906			return (B_TRUE);
907		if (strcmp(proto1, "tcp6") == 0)
908			return (B_TRUE);
909		return (B_FALSE);
910	}
911
912	if (strcmp(proto0, "udp") == 0) {
913		if (strcmp(proto1, "udp") == 0)
914			return (B_TRUE);
915		if (strcmp(proto1, "udp6") == 0)
916			return (B_TRUE);
917		return (B_FALSE);
918	}
919
920	if (strcmp(proto0, "udp6") == 0) {
921
922		if (strcmp(proto1, "udp") == 0)
923			return (B_TRUE);
924		if (strcmp(proto1, "udp6only") == 0)
925			return (B_TRUE);
926		if (strcmp(proto1, "udp6") == 0)
927			return (B_TRUE);
928		return (B_FALSE);
929	}
930
931	if (strcmp(proto0, "udp6only") == 0) {
932
933		if (strcmp(proto1, "udp6only") == 0)
934			return (B_TRUE);
935		if (strcmp(proto1, "udp6") == 0)
936			return (B_TRUE);
937		return (0);
938	}
939
940	/*
941	 * If the protocol isn't TCP/IP or UDP/IP assume that it has its own
942	 * port namepsace and that conflicts can be detected by literal string
943	 * comparison.
944	 */
945
946	if (strcmp(proto0, proto1))
947		return (FALSE);
948
949	return (B_TRUE);
950}
951
952
953/*
954 * Check if inetd thinks this RPC program number is already registered.
955 *
956 * An RPC protocol conflict occurs if
957 * 	a) the program numbers are the same and,
958 * 	b) the version numbers overlap,
959 * 	c) the protocols (TCP vs UDP vs tic*) are the same.
960 */
961
962boolean_t
963is_rpc_num_in_use(int rpc_n, char *proto, int lowver, int highver) {
964	instance_t *i;
965	basic_cfg_t *cfg;
966	proto_info_t *pi;
967
968	for (i = uu_list_first(instance_list); i != NULL;
969	    i = uu_list_next(instance_list, i)) {
970
971		if (i->cur_istate != IIS_ONLINE)
972			continue;
973		cfg = i->config->basic;
974
975		for (pi = uu_list_first(cfg->proto_list); pi != NULL;
976		    pi = uu_list_next(cfg->proto_list, pi)) {
977
978			if (pi->ri == NULL)
979				continue;
980			if (pi->ri->prognum != rpc_n)
981				continue;
982			if (!is_rpc_proto_conflict(pi->proto, proto))
983				continue;
984			if ((lowver < pi->ri->lowver &&
985			    highver < pi->ri->lowver) ||
986			    (lowver > pi->ri->highver &&
987			    highver > pi->ri->highver))
988				continue;
989			return (B_TRUE);
990		}
991	}
992	return (B_FALSE);
993}
994
995
996/*
997 * Independent of the transport, for each of the entries in the instance's
998 * proto list this function first attempts to create an associated network fd;
999 * for RPC services these are then bound to a kernel chosen port and the
1000 * fd is registered with rpcbind; for non-RPC services the fds are bound
1001 * to the port associated with the instance's service name. On any successful
1002 * binds the instance is taken online. Failed binds are handled by
1003 * handle_bind_failure().
1004 */
1005void
1006create_bound_fds(instance_t *instance)
1007{
1008	basic_cfg_t	*cfg = instance->config->basic;
1009	boolean_t	failure = B_FALSE;
1010	boolean_t	success = B_FALSE;
1011	proto_info_t	*pi;
1012
1013	debug_msg("Entering create_bound_fd: instance: %s", instance->fmri);
1014
1015	/*
1016	 * Loop through and try and bind any unbound protos.
1017	 */
1018	for (pi = uu_list_first(cfg->proto_list); pi != NULL;
1019	    pi = uu_list_next(cfg->proto_list, pi)) {
1020		if (pi->listen_fd != -1)
1021			continue;
1022		if (cfg->istlx) {
1023			pi->listen_fd = create_bound_endpoint(instance->fmri,
1024			    (tlx_info_t *)pi);
1025		} else {
1026			/*
1027			 * We cast pi to a void so we can then go on to cast
1028			 * it to a socket_info_t without lint complaining
1029			 * about alignment. This is done because the x86
1030			 * version of lint thinks a lint suppression directive
1031			 * is unnecessary and flags it as such, yet the sparc
1032			 * version complains if it's absent.
1033			 */
1034			void *p = pi;
1035			pi->listen_fd = create_bound_socket(instance->fmri,
1036			    (socket_info_t *)p);
1037		}
1038		if (pi->listen_fd == -1) {
1039			failure = B_TRUE;
1040			continue;
1041		}
1042
1043		if (pi->ri != NULL) {
1044
1045			/*
1046			 * Don't register the same RPC program number twice.
1047			 * Doing so silently discards the old service
1048			 * without causing an error.
1049			 */
1050			if (is_rpc_num_in_use(pi->ri->prognum, pi->proto,
1051				pi->ri->lowver, pi->ri->highver)) {
1052				failure = B_TRUE;
1053				close_net_fd(instance, pi->listen_fd);
1054				pi->listen_fd = -1;
1055				continue;
1056			}
1057
1058			unregister_rpc_service(instance->fmri, pi->ri);
1059			if (register_rpc_service(instance->fmri, pi->ri) ==
1060			    -1) {
1061				close_net_fd(instance, pi->listen_fd);
1062				pi->listen_fd = -1;
1063				failure = B_TRUE;
1064				continue;
1065			}
1066		}
1067
1068		success = B_TRUE;
1069	}
1070
1071	switch (instance->cur_istate) {
1072	case IIS_OFFLINE:
1073	case IIS_OFFLINE_BIND:
1074		/*
1075		 * If we've managed to bind at least one proto lets run the
1076		 * online method, so we can start listening for it.
1077		 */
1078		if (success && run_method(instance, IM_ONLINE, NULL) == -1)
1079			return;	/* instance gone to maintenance */
1080		break;
1081	case IIS_ONLINE:
1082	case IIS_IN_REFRESH_METHOD:
1083		/*
1084		 * We're 'online', so start polling on any bound fds we're
1085		 * currently not.
1086		 */
1087		if (poll_bound_fds(instance, B_TRUE) != 0) {
1088			failure = B_TRUE;
1089		} else if (!failure) {
1090			/*
1091			 * We've successfully bound and poll'd upon all protos,
1092			 * so reset the failure count.
1093			 */
1094			instance->bind_fail_count = 0;
1095		}
1096		break;
1097	case IIS_IN_ONLINE_METHOD:
1098		/*
1099		 * Nothing to do here as the method completion code will start
1100		 * listening for any successfully bound fds.
1101		 */
1102		break;
1103	default:
1104#ifndef NDEBUG
1105		(void) fprintf(stderr, "%s:%d: Unknown instance state %d.\n",
1106		    __FILE__, __LINE__, instance->cur_istate);
1107#endif
1108		abort();
1109	}
1110
1111	if (failure)
1112		handle_bind_failure(instance);
1113}
1114
1115/*
1116 * Counter to create_bound_fds(), for each of the bound network fds this
1117 * function unregisters the instance from rpcbind if it's an RPC service,
1118 * stops listening for new connections for it and then closes the listening fd.
1119 */
1120static void
1121destroy_bound_fds(instance_t *instance)
1122{
1123	basic_cfg_t	*cfg = instance->config->basic;
1124	proto_info_t	*pi;
1125
1126	debug_msg("Entering destroy_bound_fds: instance: %s", instance->fmri);
1127
1128	for (pi = uu_list_first(cfg->proto_list); pi != NULL;
1129	    pi = uu_list_next(cfg->proto_list, pi)) {
1130		if (pi->listen_fd != -1) {
1131			if (pi->ri != NULL)
1132				unregister_rpc_service(instance->fmri, pi->ri);
1133			clear_pollfd(pi->listen_fd);
1134			close_net_fd(instance, pi->listen_fd);
1135			pi->listen_fd = -1;
1136		}
1137	}
1138
1139	/* cancel any bind retries */
1140	if (instance->bind_timer_id != -1)
1141		cancel_bind_timer(instance);
1142
1143	instance->bind_retries_exceeded = B_FALSE;
1144}
1145
1146/*
1147 * Perform %A address expansion and return a pointer to a static string
1148 * array containing crafted arguments. This expansion is provided for
1149 * compatibility with 4.2BSD daemons, and as such we've copied the logic of
1150 * the legacy inetd to maintain this compatibility as much as possible. This
1151 * logic is a bit scatty, but it dates back at least as far as SunOS 4.x.
1152 */
1153static char **
1154expand_address(instance_t *inst, const proto_info_t *pi)
1155{
1156	static char	addrbuf[sizeof ("ffffffff.65536")];
1157	static char	*ret[3];
1158	instance_cfg_t	*cfg = inst->config;
1159	/*
1160	 * We cast pi to a void so we can then go on to cast it to a
1161	 * socket_info_t without lint complaining about alignment. This
1162	 * is done because the x86 version of lint thinks a lint suppression
1163	 * directive is unnecessary and flags it as such, yet the sparc
1164	 * version complains if it's absent.
1165	 */
1166	const void	*p = pi;
1167
1168	debug_msg("Entering expand_address");
1169
1170	/* set ret[0] to the basename of exec path */
1171	if ((ret[0] = strrchr(cfg->methods[IM_START]->exec_path, '/'))
1172	    != NULL) {
1173		ret[0]++;
1174	} else {
1175		ret[0] = cfg->methods[IM_START]->exec_path;
1176	}
1177
1178	if (!cfg->basic->istlx &&
1179	    (((socket_info_t *)p)->type == SOCK_DGRAM)) {
1180		ret[1] = NULL;
1181	} else {
1182		addrbuf[0] = '\0';
1183		if (!cfg->basic->iswait &&
1184		    (inst->remote_addr.ss_family == AF_INET)) {
1185			struct sockaddr_in *sp;
1186
1187			sp = (struct sockaddr_in *)&(inst->remote_addr);
1188			(void) snprintf(addrbuf, sizeof (addrbuf), "%x.%hu",
1189			    ntohl(sp->sin_addr.s_addr), ntohs(sp->sin_port));
1190		}
1191		ret[1] = addrbuf;
1192		ret[2] = NULL;
1193	}
1194
1195	return (ret);
1196}
1197
1198/*
1199 * Returns the state associated with the supplied method being run for an
1200 * instance.
1201 */
1202static internal_inst_state_t
1203get_method_state(instance_method_t method)
1204{
1205	state_info_t *sip;
1206
1207	for (sip = states; sip->istate != IIS_NONE; sip++) {
1208		if (sip->method_running == method)
1209			break;
1210	}
1211	assert(sip->istate != IIS_NONE);
1212
1213	return (sip->istate);
1214}
1215
1216/*
1217 * Store the method's PID and CID in the repository. If the store fails
1218 * we ignore it and just drive on.
1219 */
1220static void
1221add_method_ids(instance_t *ins, pid_t pid, ctid_t cid, instance_method_t mthd)
1222{
1223	debug_msg("Entering add_method_ids");
1224
1225	if (cid != -1)
1226		(void) add_remove_contract(ins, B_TRUE, cid);
1227
1228	if (mthd == IM_START) {
1229		if (add_rep_val(ins->start_pids, (int64_t)pid) == 0) {
1230			(void) store_rep_vals(ins->start_pids, ins->fmri,
1231			    PR_NAME_START_PIDS);
1232		}
1233	} else {
1234		if (add_rep_val(ins->non_start_pid, (int64_t)pid) == 0) {
1235			(void) store_rep_vals(ins->non_start_pid, ins->fmri,
1236			    PR_NAME_NON_START_PID);
1237		}
1238	}
1239}
1240
1241/*
1242 * Remove the method's PID and CID from the repository. If the removal
1243 * fails we ignore it and drive on.
1244 */
1245void
1246remove_method_ids(instance_t *inst, pid_t pid, ctid_t cid,
1247    instance_method_t mthd)
1248{
1249	debug_msg("Entering remove_method_ids");
1250
1251	if (cid != -1)
1252		(void) add_remove_contract(inst, B_FALSE, cid);
1253
1254	if (mthd == IM_START) {
1255		remove_rep_val(inst->start_pids, (int64_t)pid);
1256		(void) store_rep_vals(inst->start_pids, inst->fmri,
1257		    PR_NAME_START_PIDS);
1258	} else {
1259		remove_rep_val(inst->non_start_pid, (int64_t)pid);
1260		(void) store_rep_vals(inst->non_start_pid, inst->fmri,
1261		    PR_NAME_NON_START_PID);
1262	}
1263}
1264
1265static instance_t *
1266create_instance(const char *fmri)
1267{
1268	instance_t *ret;
1269
1270	debug_msg("Entering create_instance, instance: %s", fmri);
1271
1272	if (((ret = calloc(1, sizeof (instance_t))) == NULL) ||
1273	    ((ret->fmri = strdup(fmri)) == NULL))
1274		goto alloc_fail;
1275
1276	ret->conn_fd = -1;
1277
1278	ret->copies = 0;
1279
1280	ret->conn_rate_count = 0;
1281	ret->fail_rate_count = 0;
1282	ret->bind_fail_count = 0;
1283
1284	if (((ret->non_start_pid = create_rep_val_list()) == NULL) ||
1285	    ((ret->start_pids = create_rep_val_list()) == NULL) ||
1286	    ((ret->start_ctids = create_rep_val_list()) == NULL))
1287		goto alloc_fail;
1288
1289	ret->cur_istate = IIS_NONE;
1290	ret->next_istate = IIS_NONE;
1291
1292	if (((ret->cur_istate_rep = create_rep_val_list()) == NULL) ||
1293	    ((ret->next_istate_rep = create_rep_val_list()) == NULL))
1294		goto alloc_fail;
1295
1296	ret->config = NULL;
1297	ret->new_config = NULL;
1298
1299	ret->timer_id = -1;
1300	ret->bind_timer_id = -1;
1301
1302	ret->disable_req = B_FALSE;
1303	ret->maintenance_req = B_FALSE;
1304	ret->conn_rate_exceeded = B_FALSE;
1305	ret->bind_retries_exceeded = B_FALSE;
1306
1307	ret->pending_rst_event = RESTARTER_EVENT_TYPE_INVALID;
1308
1309	return (ret);
1310
1311alloc_fail:
1312	error_msg(strerror(errno));
1313	destroy_instance(ret);
1314	return (NULL);
1315}
1316
1317static void
1318destroy_instance(instance_t *inst)
1319{
1320	debug_msg("Entering destroy_instance");
1321
1322	if (inst == NULL)
1323		return;
1324
1325	destroy_instance_cfg(inst->config);
1326	destroy_instance_cfg(inst->new_config);
1327
1328	destroy_rep_val_list(inst->cur_istate_rep);
1329	destroy_rep_val_list(inst->next_istate_rep);
1330
1331	destroy_rep_val_list(inst->start_pids);
1332	destroy_rep_val_list(inst->non_start_pid);
1333	destroy_rep_val_list(inst->start_ctids);
1334
1335	free(inst->fmri);
1336
1337	free(inst);
1338}
1339
1340/*
1341 * Retrieves the current and next states internal states. Returns 0 on success,
1342 * else returns one of the following on error:
1343 * SCF_ERROR_NO_MEMORY if memory allocation failed.
1344 * SCF_ERROR_CONNECTION_BROKEN if the connection to the repository was broken.
1345 * SCF_ERROR_TYPE_MISMATCH if the property was of an unexpected type.
1346 * SCF_ERROR_NO_RESOURCES if the server doesn't have adequate resources.
1347 * SCF_ERROR_NO_SERVER if the server isn't running.
1348 */
1349static scf_error_t
1350retrieve_instance_state(instance_t *inst)
1351{
1352	scf_error_t	ret;
1353
1354	debug_msg("Entering retrieve_instance_state: instance: %s",
1355	    inst->fmri);
1356
1357	/* retrieve internal states */
1358	if (((ret = retrieve_rep_vals(inst->cur_istate_rep, inst->fmri,
1359	    PR_NAME_CUR_INT_STATE)) != 0) ||
1360	    ((ret = retrieve_rep_vals(inst->next_istate_rep, inst->fmri,
1361	    PR_NAME_NEXT_INT_STATE)) != 0)) {
1362		if (ret != SCF_ERROR_NOT_FOUND) {
1363			error_msg(gettext(
1364			    "Failed to read state of instance %s: %s"),
1365			    inst->fmri, scf_strerror(scf_error()));
1366			return (ret);
1367		}
1368
1369		debug_msg("instance with no previous int state - "
1370		    "setting state to uninitialized");
1371
1372		if ((set_single_rep_val(inst->cur_istate_rep,
1373		    (int64_t)IIS_UNINITIALIZED) == -1) ||
1374		    (set_single_rep_val(inst->next_istate_rep,
1375		    (int64_t)IIS_NONE) == -1)) {
1376			return (SCF_ERROR_NO_MEMORY);
1377		}
1378	}
1379
1380	/* update convenience states */
1381	inst->cur_istate = get_single_rep_val(inst->cur_istate_rep);
1382	inst->next_istate = get_single_rep_val(inst->next_istate_rep);
1383	debug_msg("previous states: cur: %d, next: %d", inst->cur_istate,
1384	    inst->next_istate);
1385
1386	return (0);
1387}
1388
1389/*
1390 * Retrieve stored process ids and register each of them so we process their
1391 * termination.
1392 */
1393static int
1394retrieve_method_pids(instance_t *inst)
1395{
1396	rep_val_t	*rv;
1397
1398	debug_msg("Entering remove_method_pids");
1399
1400	switch (retrieve_rep_vals(inst->start_pids, inst->fmri,
1401	    PR_NAME_START_PIDS)) {
1402	case 0:
1403		break;
1404	case SCF_ERROR_NOT_FOUND:
1405		return (0);
1406	default:
1407		error_msg(gettext("Failed to retrieve the start pids of "
1408		    "instance %s from repository: %s"), inst->fmri,
1409		    scf_strerror(scf_error()));
1410		return (-1);
1411	}
1412
1413	rv = uu_list_first(inst->start_pids);
1414	while (rv != NULL) {
1415		if (register_method(inst, (pid_t)rv->val, (ctid_t)-1,
1416		    IM_START) == 0) {
1417			inst->copies++;
1418			rv = uu_list_next(inst->start_pids, rv);
1419		} else if (errno == ENOENT) {
1420			pid_t pid = (pid_t)rv->val;
1421
1422			/*
1423			 * The process must have already terminated. Remove
1424			 * it from the list.
1425			 */
1426			rv = uu_list_next(inst->start_pids, rv);
1427			remove_rep_val(inst->start_pids, pid);
1428		} else {
1429			error_msg(gettext("Failed to listen for the completion "
1430			    "of %s method of instance %s"), START_METHOD_NAME,
1431			    inst->fmri);
1432			rv = uu_list_next(inst->start_pids, rv);
1433		}
1434	}
1435
1436	/* synch the repository pid list to remove any terminated pids */
1437	(void) store_rep_vals(inst->start_pids, inst->fmri, PR_NAME_START_PIDS);
1438
1439	return (0);
1440}
1441
1442/*
1443 * Remove the passed instance from inetd control.
1444 */
1445static void
1446remove_instance(instance_t *instance)
1447{
1448	debug_msg("Entering remove_instance");
1449
1450	switch (instance->cur_istate) {
1451	case IIS_ONLINE:
1452	case IIS_DEGRADED:
1453		/* stop listening for network connections */
1454		destroy_bound_fds(instance);
1455		break;
1456	case IIS_OFFLINE_BIND:
1457		cancel_bind_timer(instance);
1458		break;
1459	case IIS_OFFLINE_CONRATE:
1460		cancel_inst_timer(instance);
1461		break;
1462	}
1463
1464	/* stop listening for terminated methods */
1465	unregister_instance_methods(instance);
1466
1467	uu_list_remove(instance_list, instance);
1468	destroy_instance(instance);
1469}
1470
1471/*
1472 * Refresh the configuration of instance 'inst'. This method gets called as
1473 * a result of a refresh event for the instance from the master restarter, so
1474 * we can rely upon the instance's running snapshot having been updated from
1475 * its configuration snapshot.
1476 */
1477void
1478refresh_instance(instance_t *inst)
1479{
1480	instance_cfg_t	*cfg;
1481
1482	debug_msg("Entering refresh_instance: inst: %s", inst->fmri);
1483
1484	switch (inst->cur_istate) {
1485	case IIS_MAINTENANCE:
1486	case IIS_DISABLED:
1487	case IIS_UNINITIALIZED:
1488		/*
1489		 * Ignore any possible changes, we'll re-read the configuration
1490		 * automatically when we exit these states.
1491		 */
1492		break;
1493
1494	case IIS_OFFLINE_COPIES:
1495	case IIS_OFFLINE_BIND:
1496	case IIS_OFFLINE:
1497	case IIS_OFFLINE_CONRATE:
1498		destroy_instance_cfg(inst->config);
1499		if ((inst->config = read_instance_cfg(inst->fmri)) == NULL) {
1500			log_invalid_cfg(inst->fmri);
1501			if (inst->cur_istate == IIS_OFFLINE_BIND) {
1502				cancel_bind_timer(inst);
1503			} else if (inst->cur_istate == IIS_OFFLINE_CONRATE) {
1504				cancel_inst_timer(inst);
1505			}
1506			update_state(inst, IIS_MAINTENANCE, RERR_FAULT);
1507		} else {
1508			switch (inst->cur_istate) {
1509			case IIS_OFFLINE_BIND:
1510				if (copies_limit_exceeded(inst)) {
1511					/* Cancel scheduled bind retries. */
1512					cancel_bind_timer(inst);
1513
1514					/*
1515					 * Take the instance to the copies
1516					 * offline state, via the offline
1517					 * state.
1518					 */
1519					update_state(inst, IIS_OFFLINE,
1520					    RERR_RESTART);
1521					process_offline_inst(inst);
1522				}
1523				break;
1524
1525			case IIS_OFFLINE:
1526				process_offline_inst(inst);
1527				break;
1528
1529			case IIS_OFFLINE_CONRATE:
1530				/*
1531				 * Since we're already in a DOS state,
1532				 * don't bother evaluating the copies
1533				 * limit. This will be evaluated when
1534				 * we leave this state in
1535				 * process_offline_inst().
1536				 */
1537				break;
1538
1539			case IIS_OFFLINE_COPIES:
1540				/*
1541				 * Check if the copies limit has been increased
1542				 * above the current count.
1543				 */
1544				if (!copies_limit_exceeded(inst)) {
1545					update_state(inst, IIS_OFFLINE,
1546					    RERR_RESTART);
1547					process_offline_inst(inst);
1548				}
1549				break;
1550
1551			default:
1552				assert(0);
1553			}
1554		}
1555		break;
1556
1557	case IIS_DEGRADED:
1558	case IIS_ONLINE:
1559		if ((cfg = read_instance_cfg(inst->fmri)) != NULL) {
1560			instance_cfg_t *ocfg = inst->config;
1561
1562			/*
1563			 * Try to avoid the overhead of taking an instance
1564			 * offline and back on again. We do this by limiting
1565			 * this behavior to two eventualities:
1566			 * - there needs to be a re-bind to listen on behalf
1567			 *   of the instance with its new configuration. This
1568			 *   could be because for example its service has been
1569			 *   associated with a different port, or because the
1570			 *   v6only protocol option has been newly applied to
1571			 *   the instance.
1572			 * - one or both of the start or online methods of the
1573			 *   instance have changed in the new configuration.
1574			 *   Without taking the instance offline when the
1575			 *   start method changed the instance may be running
1576			 *   with unwanted parameters (or event an unwanted
1577			 *   binary); and without taking the instance offline
1578			 *   if its online method was to change, some part of
1579			 *   its running environment may have changed and would
1580			 *   not be picked up until the instance next goes
1581			 *   offline for another reason.
1582			 */
1583			if ((!bind_config_equal(ocfg->basic, cfg->basic)) ||
1584			    !method_info_equal(ocfg->methods[IM_ONLINE],
1585			    cfg->methods[IM_ONLINE]) ||
1586			    !method_info_equal(ocfg->methods[IM_START],
1587			    cfg->methods[IM_START])) {
1588				destroy_bound_fds(inst);
1589
1590				assert(inst->new_config == NULL);
1591				inst->new_config = cfg;
1592
1593				(void) run_method(inst, IM_OFFLINE, NULL);
1594			} else {	/* no bind config / method changes */
1595
1596				/*
1597				 * swap the proto list over from the old
1598				 * configuration to the new, so we retain
1599				 * our set of network fds.
1600				 */
1601				destroy_proto_list(cfg->basic);
1602				cfg->basic->proto_list =
1603				    ocfg->basic->proto_list;
1604				ocfg->basic->proto_list = NULL;
1605				destroy_instance_cfg(ocfg);
1606				inst->config = cfg;
1607
1608				/* re-evaluate copies limits based on new cfg */
1609				if (copies_limit_exceeded(inst)) {
1610					destroy_bound_fds(inst);
1611					(void) run_method(inst, IM_OFFLINE,
1612					    NULL);
1613				} else {
1614					/*
1615					 * Since the instance isn't being
1616					 * taken offline, where we assume it
1617					 * would pick-up any configuration
1618					 * changes automatically when it goes
1619					 * back online, run its refresh method
1620					 * to allow it to pick-up any changes
1621					 * whilst still online.
1622					 */
1623					(void) run_method(inst, IM_REFRESH,
1624					    NULL);
1625				}
1626			}
1627		} else {
1628			log_invalid_cfg(inst->fmri);
1629
1630			destroy_bound_fds(inst);
1631
1632			inst->maintenance_req = B_TRUE;
1633			(void) run_method(inst, IM_OFFLINE, NULL);
1634		}
1635		break;
1636
1637	default:
1638		debug_msg("Unhandled current state %d for instance in "
1639		    "refresh_instance", inst->cur_istate);
1640		assert(0);
1641	}
1642}
1643
1644/*
1645 * Called by process_restarter_event() to handle a restarter event for an
1646 * instance.
1647 */
1648static void
1649handle_restarter_event(instance_t *instance, restarter_event_type_t event,
1650    boolean_t send_ack)
1651{
1652	debug_msg("Entering handle_restarter_event: inst: %s, event: %d, "
1653	    "curr state: %d", instance->fmri, event, instance->cur_istate);
1654
1655	switch (event) {
1656	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
1657		refresh_instance(instance);
1658		goto done;
1659	case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
1660		remove_instance(instance);
1661		goto done;
1662	case RESTARTER_EVENT_TYPE_STOP:
1663		switch (instance->cur_istate) {
1664		case IIS_OFFLINE_CONRATE:
1665		case IIS_OFFLINE_BIND:
1666		case IIS_OFFLINE_COPIES:
1667			/*
1668			 * inetd must be closing down as we wouldn't get this
1669			 * event in one of these states from the master
1670			 * restarter. Take the instance to the offline resting
1671			 * state.
1672			 */
1673			if (instance->cur_istate == IIS_OFFLINE_BIND) {
1674				cancel_bind_timer(instance);
1675			} else if (instance->cur_istate ==
1676			    IIS_OFFLINE_CONRATE) {
1677				cancel_inst_timer(instance);
1678			}
1679			update_state(instance, IIS_OFFLINE, RERR_RESTART);
1680			goto done;
1681		}
1682		break;
1683	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
1684		/*
1685		 * We've got a restart event, so if the instance is online
1686		 * in any way initiate taking it offline, and rely upon
1687		 * our restarter to send us an online event to bring
1688		 * it back online.
1689		 */
1690		switch (instance->cur_istate) {
1691		case IIS_ONLINE:
1692		case IIS_DEGRADED:
1693			destroy_bound_fds(instance);
1694			(void) run_method(instance, IM_OFFLINE, NULL);
1695		}
1696		goto done;
1697	}
1698
1699	switch (instance->cur_istate) {
1700	case IIS_OFFLINE:
1701		switch (event) {
1702		case RESTARTER_EVENT_TYPE_START:
1703			/*
1704			 * Dependencies are met, let's take the service online.
1705			 * Only try and bind for a wait type service if
1706			 * no process is running on its behalf. Otherwise, just
1707			 * mark the service online and binding will be attempted
1708			 * when the process exits.
1709			 */
1710			if (!(instance->config->basic->iswait &&
1711			    (uu_list_first(instance->start_pids) != NULL))) {
1712				create_bound_fds(instance);
1713			} else {
1714				update_state(instance, IIS_ONLINE, RERR_NONE);
1715			}
1716			break;
1717		case RESTARTER_EVENT_TYPE_DISABLE:
1718		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1719			/*
1720			 * The instance should be disabled, so run the
1721			 * instance's disabled method that will do the work
1722			 * to take it there.
1723			 */
1724			(void) run_method(instance, IM_DISABLE, NULL);
1725			break;
1726		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1727		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1728		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1729			/*
1730			 * The master restarter has requested the instance
1731			 * go to maintenance; since we're already offline
1732			 * just update the state to the maintenance state.
1733			 */
1734			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1735			break;
1736		}
1737		break;
1738
1739	case IIS_OFFLINE_BIND:
1740		switch (event) {
1741		case RESTARTER_EVENT_TYPE_DISABLE:
1742		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1743			/*
1744			 * The instance should be disabled. Firstly, as for
1745			 * the above dependencies unmet comment, cancel
1746			 * the bind retry timer and update the state to
1747			 * offline. Then, run the disable method to do the
1748			 * work to take the instance from offline to
1749			 * disabled.
1750			 */
1751			cancel_bind_timer(instance);
1752			update_state(instance, IIS_OFFLINE, RERR_RESTART);
1753			(void) run_method(instance, IM_DISABLE, NULL);
1754			break;
1755		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1756		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1757		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1758			/*
1759			 * The master restarter has requested the instance
1760			 * be placed in the maintenance state. Cancel the
1761			 * outstanding retry timer, and since we're already
1762			 * offline, update the state to maintenance.
1763			 */
1764			cancel_bind_timer(instance);
1765			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1766			break;
1767		}
1768		break;
1769
1770	case IIS_DEGRADED:
1771	case IIS_ONLINE:
1772		switch (event) {
1773		case RESTARTER_EVENT_TYPE_DISABLE:
1774		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1775			/*
1776			 * The instance needs to be disabled. Do the same work
1777			 * as for the dependencies unmet event below to
1778			 * take the instance offline.
1779			 */
1780			destroy_bound_fds(instance);
1781			/*
1782			 * Indicate that the offline method is being run
1783			 * as part of going to the disabled state, and to
1784			 * carry on this transition.
1785			 */
1786			instance->disable_req = B_TRUE;
1787			(void) run_method(instance, IM_OFFLINE, NULL);
1788			break;
1789		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1790		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1791		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1792			/*
1793			 * The master restarter has requested the instance be
1794			 * placed in the maintenance state. This involves
1795			 * firstly taking the service offline, so do the
1796			 * same work as for the dependencies unmet event
1797			 * below. We set the maintenance_req flag to
1798			 * indicate that when we get to the offline state
1799			 * we should be placed directly into the maintenance
1800			 * state.
1801			 */
1802			instance->maintenance_req = B_TRUE;
1803			/* FALLTHROUGH */
1804		case RESTARTER_EVENT_TYPE_STOP:
1805			/*
1806			 * Dependencies have become unmet. Close and
1807			 * stop listening on the instance's network file
1808			 * descriptor, and run the offline method to do
1809			 * any work required to take us to the offline state.
1810			 */
1811			destroy_bound_fds(instance);
1812			(void) run_method(instance, IM_OFFLINE, NULL);
1813		}
1814		break;
1815
1816	case IIS_UNINITIALIZED:
1817		if (event == RESTARTER_EVENT_TYPE_DISABLE ||
1818		    event == RESTARTER_EVENT_TYPE_ADMIN_DISABLE) {
1819			update_state(instance, IIS_DISABLED, RERR_NONE);
1820			break;
1821		} else if (event != RESTARTER_EVENT_TYPE_ENABLE) {
1822			/*
1823			 * Ignore other events until we know whether we're
1824			 * enabled or not.
1825			 */
1826			break;
1827		}
1828
1829		/*
1830		 * We've got an enabled event; make use of the handling in the
1831		 * disable case.
1832		 */
1833		/* FALLTHROUGH */
1834
1835	case IIS_DISABLED:
1836		switch (event) {
1837		case RESTARTER_EVENT_TYPE_ENABLE:
1838			/*
1839			 * The instance needs enabling. Commence reading its
1840			 * configuration and if successful place the instance
1841			 * in the offline state and let process_offline_inst()
1842			 * take it from there.
1843			 */
1844			destroy_instance_cfg(instance->config);
1845			instance->config = read_instance_cfg(instance->fmri);
1846			if (instance->config != NULL) {
1847				update_state(instance, IIS_OFFLINE,
1848				    RERR_RESTART);
1849				process_offline_inst(instance);
1850			} else {
1851				log_invalid_cfg(instance->fmri);
1852				update_state(instance, IIS_MAINTENANCE,
1853				    RERR_RESTART);
1854			}
1855
1856			break;
1857		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1858		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1859		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1860			/*
1861			 * The master restarter has requested the instance be
1862			 * placed in the maintenance state, so just update its
1863			 * state to maintenance.
1864			 */
1865			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1866			break;
1867		}
1868		break;
1869
1870	case IIS_MAINTENANCE:
1871		switch (event) {
1872		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
1873		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1874			/*
1875			 * The master restarter has requested that the instance
1876			 * be taken out of maintenance. Read its configuration,
1877			 * and if successful place the instance in the offline
1878			 * state and call process_offline_inst() to take it
1879			 * from there.
1880			 */
1881			destroy_instance_cfg(instance->config);
1882			instance->config = read_instance_cfg(instance->fmri);
1883			if (instance->config != NULL) {
1884				update_state(instance, IIS_OFFLINE,
1885				    RERR_RESTART);
1886				process_offline_inst(instance);
1887			} else {
1888				boolean_t enabled;
1889
1890				/*
1891				 * The configuration was invalid. If the
1892				 * service has disabled requested, let's
1893				 * just place the instance in disabled even
1894				 * though we haven't been able to run its
1895				 * disable method, as the slightly incorrect
1896				 * state is likely to be less of an issue to
1897				 * an administrator than refusing to move an
1898				 * instance to disabled. If disable isn't
1899				 * requested, re-mark the service's state
1900				 * as maintenance, so the administrator can
1901				 * see the request was processed.
1902				 */
1903				if ((read_enable_merged(instance->fmri,
1904				    &enabled) == 0) && !enabled) {
1905					update_state(instance, IIS_DISABLED,
1906					    RERR_RESTART);
1907				} else {
1908					log_invalid_cfg(instance->fmri);
1909					update_state(instance, IIS_MAINTENANCE,
1910					    RERR_FAULT);
1911				}
1912			}
1913			break;
1914		}
1915		break;
1916
1917	case IIS_OFFLINE_CONRATE:
1918		switch (event) {
1919		case RESTARTER_EVENT_TYPE_DISABLE:
1920			/*
1921			 * The instance wants disabling. Take the instance
1922			 * offline as for the dependencies unmet event above,
1923			 * and then from there run the disable method to do
1924			 * the work to take the instance to the disabled state.
1925			 */
1926			cancel_inst_timer(instance);
1927			update_state(instance, IIS_OFFLINE, RERR_RESTART);
1928			(void) run_method(instance, IM_DISABLE, NULL);
1929			break;
1930		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1931		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1932		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1933			/*
1934			 * The master restarter has requested the instance
1935			 * be taken to maintenance. Cancel the timer setup
1936			 * when we entered this state, and go directly to
1937			 * maintenance.
1938			 */
1939			cancel_inst_timer(instance);
1940			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1941			break;
1942		}
1943		break;
1944
1945	case IIS_OFFLINE_COPIES:
1946		switch (event) {
1947		case RESTARTER_EVENT_TYPE_DISABLE:
1948			/*
1949			 * The instance wants disabling. Update the state
1950			 * to offline, and run the disable method to do the
1951			 * work to take it to the disabled state.
1952			 */
1953			update_state(instance, IIS_OFFLINE, RERR_RESTART);
1954			(void) run_method(instance, IM_DISABLE, NULL);
1955			break;
1956		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
1957		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1958		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1959			/*
1960			 * The master restarter has requested the instance be
1961			 * placed in maintenance. Since it's already offline
1962			 * simply update the state.
1963			 */
1964			update_state(instance, IIS_MAINTENANCE, RERR_RESTART);
1965			break;
1966		}
1967		break;
1968
1969	default:
1970		debug_msg("handle_restarter_event: instance in an "
1971		    "unexpected state");
1972		assert(0);
1973	}
1974
1975done:
1976	if (send_ack)
1977		ack_restarter_event(B_TRUE);
1978}
1979
1980/*
1981 * Tries to read and process an event from the event pipe. If there isn't one
1982 * or an error occurred processing the event it returns -1. Else, if the event
1983 * is for an instance we're not already managing we read its state, add it to
1984 * our list to manage, and if appropriate read its configuration. Whether it's
1985 * new to us or not, we then handle the specific event.
1986 * Returns 0 if an event was read and processed successfully, else -1.
1987 */
1988static int
1989process_restarter_event(void)
1990{
1991	char			*fmri;
1992	size_t			fmri_size;
1993	restarter_event_type_t  event_type;
1994	instance_t		*instance;
1995	restarter_event_t	*event;
1996	ssize_t			sz;
1997
1998	debug_msg("Entering process_restarter_event");
1999
2000	/*
2001	 * Try to read an event pointer from the event pipe.
2002	 */
2003	errno = 0;
2004	switch (safe_read(rst_event_pipe[PE_CONSUMER], &event,
2005	    sizeof (event))) {
2006	case 0:
2007		break;
2008	case  1:
2009		if (errno == EAGAIN)	/* no event to read */
2010			return (-1);
2011
2012		/* other end of pipe closed */
2013
2014		/* FALLTHROUGH */
2015	default:			/* unexpected read error */
2016		/*
2017		 * There's something wrong with the event pipe. Let's
2018		 * shutdown and be restarted.
2019		 */
2020		inetd_stop();
2021		return (-1);
2022	}
2023
2024	/*
2025	 * Check if we're currently managing the instance which the event
2026	 * pertains to. If not, read its complete state and add it to our
2027	 * list to manage.
2028	 */
2029
2030	fmri_size = scf_limit(SCF_LIMIT_MAX_FMRI_LENGTH);
2031	if ((fmri = malloc(fmri_size)) == NULL) {
2032		error_msg(strerror(errno));
2033		goto fail;
2034	}
2035	sz = restarter_event_get_instance(event, fmri, fmri_size);
2036	if (sz >= fmri_size)
2037		assert(0);
2038
2039	for (instance = uu_list_first(instance_list); instance != NULL;
2040	    instance = uu_list_next(instance_list, instance)) {
2041		if (strcmp(instance->fmri, fmri) == 0)
2042			break;
2043	}
2044
2045	if (instance == NULL) {
2046		int err;
2047
2048		debug_msg("New instance to manage: %s", fmri);
2049
2050		if (((instance = create_instance(fmri)) == NULL) ||
2051		    (retrieve_instance_state(instance) != 0) ||
2052		    (retrieve_method_pids(instance) != 0)) {
2053			destroy_instance(instance);
2054			free(fmri);
2055			goto fail;
2056		}
2057
2058		if (((err = iterate_repository_contracts(instance, 0))
2059		    != 0) && (err != ENOENT)) {
2060			error_msg(gettext(
2061			    "Failed to adopt contracts of instance %s: %s"),
2062			    instance->fmri, strerror(err));
2063			destroy_instance(instance);
2064			free(fmri);
2065			goto fail;
2066		}
2067
2068		uu_list_node_init(instance, &instance->link, instance_pool);
2069		(void) uu_list_insert_after(instance_list, NULL, instance);
2070
2071		/*
2072		 * Only read configuration for instances that aren't in any of
2073		 * the disabled, maintenance or uninitialized states, since
2074		 * they'll read it on state exit.
2075		 */
2076		if ((instance->cur_istate != IIS_DISABLED) &&
2077		    (instance->cur_istate != IIS_MAINTENANCE) &&
2078		    (instance->cur_istate != IIS_UNINITIALIZED)) {
2079			instance->config = read_instance_cfg(instance->fmri);
2080			if (instance->config == NULL) {
2081				log_invalid_cfg(instance->fmri);
2082				update_state(instance, IIS_MAINTENANCE,
2083				    RERR_FAULT);
2084			}
2085		}
2086	}
2087
2088	free(fmri);
2089
2090	event_type = restarter_event_get_type(event);
2091	debug_msg("Event type: %d for instance: %s", event_type,
2092	    instance->fmri);
2093
2094	/*
2095	 * If the instance is currently running a method, don't process the
2096	 * event now, but attach it to the instance for processing when
2097	 * the instance finishes its transition.
2098	 */
2099	if (INST_IN_TRANSITION(instance)) {
2100		debug_msg("storing event %d for instance %s", event_type,
2101		    instance->fmri);
2102		instance->pending_rst_event = event_type;
2103	} else {
2104		handle_restarter_event(instance, event_type, B_TRUE);
2105	}
2106
2107	return (0);
2108
2109fail:
2110	ack_restarter_event(B_FALSE);
2111	return (-1);
2112}
2113
2114/*
2115 * Do the state machine processing associated with the termination of instance
2116 * 'inst''s start method.
2117 */
2118void
2119process_start_term(instance_t *inst)
2120{
2121	basic_cfg_t	*cfg;
2122
2123	debug_msg("Entering process_start_term: inst: %s", inst->fmri);
2124
2125	inst->copies--;
2126
2127	if ((inst->cur_istate == IIS_MAINTENANCE) ||
2128	    (inst->cur_istate == IIS_DISABLED)) {
2129		/* do any further processing/checks when we exit these states */
2130		return;
2131	}
2132
2133	cfg = inst->config->basic;
2134
2135	if (cfg->iswait) {
2136		proto_info_t	*pi;
2137
2138		switch (inst->cur_istate) {
2139		case IIS_ONLINE:
2140		case IIS_DEGRADED:
2141		case IIS_IN_REFRESH_METHOD:
2142			/*
2143			 * A wait type service's start method has exited.
2144			 * Check if the method was fired off in this inetd's
2145			 * lifetime, or a previous one; if the former,
2146			 * re-commence listening on the service's behalf; if
2147			 * the latter, mark the service offline and let bind
2148			 * attempts commence.
2149			 */
2150			for (pi = uu_list_first(cfg->proto_list); pi != NULL;
2151			    pi = uu_list_next(cfg->proto_list, pi)) {
2152				/*
2153				 * If a bound fd exists, the method was fired
2154				 * off during this inetd's lifetime.
2155				 */
2156				if (pi->listen_fd != -1)
2157					break;
2158			}
2159			if (pi != NULL) {
2160				if (poll_bound_fds(inst, B_TRUE) != 0)
2161					handle_bind_failure(inst);
2162			} else {
2163				update_state(inst, IIS_OFFLINE, RERR_RESTART);
2164				create_bound_fds(inst);
2165			}
2166		}
2167	} else {
2168		/*
2169		 * Check if a nowait service should be brought back online
2170		 * after exceeding its copies limit.
2171		 */
2172		if ((inst->cur_istate == IIS_OFFLINE_COPIES) &&
2173		    !copies_limit_exceeded(inst)) {
2174			update_state(inst, IIS_OFFLINE, RERR_NONE);
2175			process_offline_inst(inst);
2176		}
2177	}
2178}
2179
2180/*
2181 * If the instance has a pending event process it and initiate the
2182 * acknowledgement.
2183 */
2184static void
2185process_pending_rst_event(instance_t *inst)
2186{
2187	if (inst->pending_rst_event != RESTARTER_EVENT_TYPE_INVALID) {
2188		restarter_event_type_t re;
2189
2190		debug_msg("Injecting pending event %d for instance %s",
2191		    inst->pending_rst_event, inst->fmri);
2192		re = inst->pending_rst_event;
2193		inst->pending_rst_event = RESTARTER_EVENT_TYPE_INVALID;
2194		handle_restarter_event(inst, re, B_TRUE);
2195	}
2196}
2197
2198/*
2199 * Do the state machine processing associated with the termination
2200 * of the specified instance's non-start method with the specified status.
2201 * Once the processing of the termination is done, the function also picks up
2202 * any processing that was blocked on the method running.
2203 */
2204void
2205process_non_start_term(instance_t *inst, int status)
2206{
2207	boolean_t ran_online_method = B_FALSE;
2208
2209	debug_msg("Entering process_non_start_term: inst: %s, method: %s",
2210	    inst->fmri, methods[states[inst->cur_istate].method_running].name);
2211
2212	if (status == IMRET_FAILURE) {
2213		error_msg(gettext("The %s method of instance %s failed, "
2214		    "transitioning to maintenance"),
2215		    methods[states[inst->cur_istate].method_running].name,
2216		    inst->fmri);
2217
2218		if ((inst->cur_istate == IIS_IN_ONLINE_METHOD) ||
2219		    (inst->cur_istate == IIS_IN_REFRESH_METHOD))
2220			destroy_bound_fds(inst);
2221
2222		update_state(inst, IIS_MAINTENANCE, RERR_FAULT);
2223
2224		inst->maintenance_req = B_FALSE;
2225		inst->conn_rate_exceeded = B_FALSE;
2226
2227		if (inst->new_config != NULL) {
2228			destroy_instance_cfg(inst->new_config);
2229			inst->new_config = NULL;
2230		}
2231
2232		if (!inetd_stopping)
2233			process_pending_rst_event(inst);
2234
2235		return;
2236	}
2237
2238	/* non-failure method return */
2239
2240	if (status != IMRET_SUCCESS) {
2241		/*
2242		 * An instance method never returned a supported return code.
2243		 * We'll assume this means the method succeeded for now whilst
2244		 * non-GL-cognizant methods are used - eg. pkill.
2245		 */
2246		debug_msg("The %s method of instance %s returned "
2247		    "non-compliant exit code: %d, assuming success",
2248		    methods[states[inst->cur_istate].method_running].name,
2249		    inst->fmri, status);
2250	}
2251
2252	/*
2253	 * Update the state from the in-transition state.
2254	 */
2255	switch (inst->cur_istate) {
2256	case IIS_IN_ONLINE_METHOD:
2257		ran_online_method = B_TRUE;
2258		/* FALLTHROUGH */
2259	case IIS_IN_REFRESH_METHOD:
2260		/*
2261		 * If we've exhausted the bind retries, flag that by setting
2262		 * the instance's state to degraded.
2263		 */
2264		if (inst->bind_retries_exceeded) {
2265			update_state(inst, IIS_DEGRADED, RERR_NONE);
2266			break;
2267		}
2268		/* FALLTHROUGH */
2269	default:
2270		update_state(inst,
2271		    methods[states[inst->cur_istate].method_running].dst_state,
2272		    RERR_NONE);
2273	}
2274
2275	if (inst->cur_istate == IIS_OFFLINE) {
2276		if (inst->new_config != NULL) {
2277			/*
2278			 * This instance was found during refresh to need
2279			 * taking offline because its newly read configuration
2280			 * was sufficiently different. Now we're offline,
2281			 * activate this new configuration.
2282			 */
2283			destroy_instance_cfg(inst->config);
2284			inst->config = inst->new_config;
2285			inst->new_config = NULL;
2286		}
2287
2288		/* continue/complete any transitions that are in progress */
2289		process_offline_inst(inst);
2290
2291	} else if (ran_online_method) {
2292		/*
2293		 * We've just successfully executed the online method. We have
2294		 * a set of bound network fds that were created before running
2295		 * this method, so now we're online start listening for
2296		 * connections on them.
2297		 */
2298		if (poll_bound_fds(inst, B_TRUE) != 0)
2299			handle_bind_failure(inst);
2300	}
2301
2302	/*
2303	 * If we're now out of transition (process_offline_inst() could have
2304	 * fired off another method), carry out any jobs that were blocked by
2305	 * us being in transition.
2306	 */
2307	if (!INST_IN_TRANSITION(inst)) {
2308		if (inetd_stopping) {
2309			if (!instance_stopped(inst)) {
2310				/*
2311				 * inetd is stopping, and this instance hasn't
2312				 * been stopped. Inject a stop event.
2313				 */
2314				handle_restarter_event(inst,
2315				    RESTARTER_EVENT_TYPE_STOP, B_FALSE);
2316			}
2317		} else {
2318			process_pending_rst_event(inst);
2319		}
2320	}
2321}
2322
2323/*
2324 * Check if configuration file specified is readable. If not return B_FALSE,
2325 * else return B_TRUE.
2326 */
2327static boolean_t
2328can_read_file(const char *path)
2329{
2330	int	ret;
2331	int	serrno;
2332
2333	debug_msg("Entering can_read_file");
2334	do {
2335		ret = access(path, R_OK);
2336	} while ((ret < 0) && (errno == EINTR));
2337	if (ret < 0) {
2338		if (errno != ENOENT) {
2339			serrno = errno;
2340			error_msg(gettext("Failed to access configuration "
2341			    "file %s for performing modification checks: %s"),
2342			    path, strerror(errno));
2343			errno = serrno;
2344		}
2345		return (B_FALSE);
2346	}
2347	return (B_TRUE);
2348}
2349
2350/*
2351 * Check whether the configuration file has changed contents since inetd
2352 * was last started/refreshed, and if so, log a message indicating that
2353 * inetconv needs to be run.
2354 */
2355static void
2356check_conf_file(void)
2357{
2358	char		*new_hash;
2359	char		*old_hash = NULL;
2360	scf_error_t	ret;
2361	const char	*file;
2362
2363	debug_msg("Entering check_conf_file");
2364
2365	if (conf_file == NULL) {
2366		/*
2367		 * No explicit config file specified, so see if one of the
2368		 * default two are readable, checking the primary one first
2369		 * followed by the secondary.
2370		 */
2371		if (can_read_file(PRIMARY_DEFAULT_CONF_FILE)) {
2372			file = PRIMARY_DEFAULT_CONF_FILE;
2373		} else if ((errno == ENOENT) &&
2374		    can_read_file(SECONDARY_DEFAULT_CONF_FILE)) {
2375			file = SECONDARY_DEFAULT_CONF_FILE;
2376		} else {
2377			return;
2378		}
2379	} else {
2380		file = conf_file;
2381		if (!can_read_file(file))
2382			return;
2383	}
2384
2385	if (calculate_hash(file, &new_hash) == 0) {
2386		ret = retrieve_inetd_hash(&old_hash);
2387		if (((ret == SCF_ERROR_NONE) &&
2388		    (strcmp(old_hash, new_hash) != 0))) {
2389			/* modified config file */
2390			warn_msg(gettext(
2391			    "Configuration file %s has been modified since "
2392			    "inetconv was last run. \"inetconv -i %s\" must be "
2393			    "run to apply any changes to the SMF"), file, file);
2394		} else if ((ret != SCF_ERROR_NOT_FOUND) &&
2395		    (ret != SCF_ERROR_NONE)) {
2396			/* No message if hash not yet computed */
2397			error_msg(gettext("Failed to check whether "
2398			    "configuration file %s has been modified: %s"),
2399			    file, scf_strerror(ret));
2400		}
2401		free(old_hash);
2402		free(new_hash);
2403	} else {
2404		error_msg(gettext("Failed to check whether configuration file "
2405		    "%s has been modified: %s"), file, strerror(errno));
2406	}
2407}
2408
2409/*
2410 * Refresh all inetd's managed instances and check the configuration file
2411 * for any updates since inetconv was last run, logging a message if there
2412 * are. We call the SMF refresh function to refresh each instance so that
2413 * the refresh request goes through the framework, and thus results in the
2414 * running snapshot of each instance being updated from the configuration
2415 * snapshot.
2416 */
2417static void
2418inetd_refresh(void)
2419{
2420	instance_t	*inst;
2421
2422	debug_msg("Entering inetd_refresh");
2423
2424	/* call libscf to send refresh requests for all managed instances */
2425	for (inst = uu_list_first(instance_list); inst != NULL;
2426	    inst = uu_list_next(instance_list, inst)) {
2427		if (smf_refresh_instance(inst->fmri) < 0) {
2428			error_msg(gettext("Failed to refresh instance %s: %s"),
2429			    inst->fmri, scf_strerror(scf_error()));
2430		}
2431	}
2432
2433	/*
2434	 * Log a message if the configuration file has changed since inetconv
2435	 * was last run.
2436	 */
2437	check_conf_file();
2438}
2439
2440/*
2441 * Initiate inetd's shutdown.
2442 */
2443static void
2444inetd_stop(void)
2445{
2446	instance_t *inst;
2447
2448	debug_msg("Entering inetd_stop");
2449
2450	/* Block handling signals for stop and refresh */
2451	(void) sighold(SIGHUP);
2452	(void) sighold(SIGTERM);
2453
2454	/* Indicate inetd is coming down */
2455	inetd_stopping = B_TRUE;
2456
2457	/* Stop polling on restarter events. */
2458	clear_pollfd(rst_event_pipe[PE_CONSUMER]);
2459
2460	/* Stop polling for any more stop/refresh requests. */
2461	clear_pollfd(uds_fd);
2462
2463	/*
2464	 * Send a stop event to all currently unstopped instances that
2465	 * aren't in transition. For those that are in transition, the
2466	 * event will get sent when the transition completes.
2467	 */
2468	for (inst = uu_list_first(instance_list); inst != NULL;
2469	    inst = uu_list_next(instance_list, inst)) {
2470		if (!instance_stopped(inst) && !INST_IN_TRANSITION(inst))
2471			handle_restarter_event(inst,
2472			    RESTARTER_EVENT_TYPE_STOP, B_FALSE);
2473	}
2474}
2475
2476/*
2477 * Sets up the intra-inetd-process Unix Domain Socket.
2478 * Returns -1 on error, else 0.
2479 */
2480static int
2481uds_init(void)
2482{
2483	struct sockaddr_un addr;
2484
2485	debug_msg("Entering uds_init");
2486
2487	if ((uds_fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
2488		error_msg("socket: %s", strerror(errno));
2489		return (-1);
2490	}
2491
2492	disable_blocking(uds_fd);
2493
2494	(void) unlink(INETD_UDS_PATH);  /* clean-up any stale files */
2495
2496	(void) memset(&addr, 0, sizeof (addr));
2497	addr.sun_family = AF_UNIX;
2498	/* CONSTCOND */
2499	assert(sizeof (INETD_UDS_PATH) <= sizeof (addr.sun_path));
2500	(void) strlcpy(addr.sun_path, INETD_UDS_PATH, sizeof (addr.sun_path));
2501
2502	if (bind(uds_fd, (struct sockaddr *)(&addr), sizeof (addr)) < 0) {
2503		error_msg(gettext("Failed to bind socket to %s: %s"),
2504		    INETD_UDS_PATH, strerror(errno));
2505		(void) close(uds_fd);
2506		return (-1);
2507	}
2508
2509	(void) listen(uds_fd, UDS_BACKLOG);
2510
2511	if ((set_pollfd(uds_fd, POLLIN)) == -1) {
2512		(void) close(uds_fd);
2513		(void) unlink(INETD_UDS_PATH);
2514		return (-1);
2515	}
2516
2517	return (0);
2518}
2519
2520static void
2521uds_fini(void)
2522{
2523	if (uds_fd != -1)
2524		(void) close(uds_fd);
2525	(void) unlink(INETD_UDS_PATH);
2526}
2527
2528/*
2529 * Handle an incoming request on the Unix Domain Socket. Returns -1 if there
2530 * was an error handling the event, else 0.
2531 */
2532static int
2533process_uds_event(void)
2534{
2535	uds_request_t		req;
2536	int			fd;
2537	struct sockaddr_un	addr;
2538	socklen_t		len = sizeof (addr);
2539	int			ret;
2540	uint_t			retries = 0;
2541
2542	debug_msg("Entering process_uds_event");
2543
2544	do {
2545		fd = accept(uds_fd, (struct sockaddr *)&addr, &len);
2546	} while ((fd < 0) && (errno == EINTR));
2547	if (fd < 0) {
2548		if (errno != EWOULDBLOCK)
2549			error_msg("accept failed: %s", strerror(errno));
2550		return (-1);
2551	}
2552
2553	for (retries = 0; retries < UDS_RECV_RETRIES; retries++) {
2554		if (((ret = safe_read(fd, &req, sizeof (req))) != 1) ||
2555		    (errno != EAGAIN))
2556			break;
2557
2558		(void) poll(NULL, 0, 100);	/* 100ms pause */
2559	}
2560
2561	if (ret != 0) {
2562		error_msg(gettext("Failed read: %s"), strerror(errno));
2563		(void) close(fd);
2564		return (-1);
2565	}
2566
2567	switch (req) {
2568	case UR_REFRESH_INETD:
2569		/* flag the request for event_loop() to process */
2570		refresh_inetd_requested = B_TRUE;
2571		(void) close(fd);
2572		break;
2573	case UR_STOP_INETD:
2574		inetd_stop();
2575		break;
2576	default:
2577		error_msg("unexpected UDS request");
2578		(void) close(fd);
2579		return (-1);
2580	}
2581
2582	return (0);
2583}
2584
2585/*
2586 * Perform checks for common exec string errors. We limit the checks to
2587 * whether the file exists, is a regular file, and has at least one execute
2588 * bit set. We leave the core security checks to exec() so as not to duplicate
2589 * and thus incur the associated drawbacks, but hope to catch the common
2590 * errors here.
2591 */
2592static boolean_t
2593passes_basic_exec_checks(const char *instance, const char *method,
2594    const char *path)
2595{
2596	struct stat	sbuf;
2597
2598	debug_msg("Entering passes_basic_exec_checks");
2599
2600	/* check the file exists */
2601	while (stat(path, &sbuf) == -1) {
2602		if (errno != EINTR) {
2603			error_msg(gettext(
2604			    "Can't stat the %s method of instance %s: %s"),
2605			    method, instance, strerror(errno));
2606			return (B_FALSE);
2607		}
2608	}
2609
2610	/*
2611	 * Check if the file is a regular file and has at least one execute
2612	 * bit set.
2613	 */
2614	if ((sbuf.st_mode & S_IFMT) != S_IFREG) {
2615		error_msg(gettext(
2616		    "The %s method of instance %s isn't a regular file"),
2617		    method, instance);
2618		return (B_FALSE);
2619	} else if ((sbuf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
2620		error_msg(gettext("The %s method instance %s doesn't have "
2621		    "any execute permissions set"), method, instance);
2622		return (B_FALSE);
2623	}
2624
2625	return (B_TRUE);
2626}
2627
2628static void
2629exec_method(instance_t *instance, instance_method_t method, method_info_t *mi,
2630    struct method_context *mthd_ctxt, const proto_info_t *pi)
2631{
2632	char		**args;
2633	char 		**env;
2634	const char	*errf;
2635	int		serrno;
2636	basic_cfg_t	*cfg = instance->config->basic;
2637
2638	if (method == IM_START) {
2639		/*
2640		 * If wrappers checks fail, pretend the method was exec'd and
2641		 * failed.
2642		 */
2643		if (!tcp_wrappers_ok(instance))
2644			exit(IMRET_FAILURE);
2645	}
2646
2647	/*
2648	 * Revert the disposition of handled signals and ignored signals to
2649	 * their defaults, unblocking any blocked ones as a side effect.
2650	 */
2651	(void) sigset(SIGHUP, SIG_DFL);
2652	(void) sigset(SIGTERM, SIG_DFL);
2653	(void) sigset(SIGINT, SIG_DFL);
2654
2655	/*
2656	 * Setup exec arguments. Do this before the fd setup below, so our
2657	 * logging related file fd doesn't get taken over before we call
2658	 * expand_address().
2659	 */
2660	if ((method == IM_START) &&
2661	    (strcmp(mi->exec_args_we.we_wordv[0], "%A") == 0)) {
2662		args = expand_address(instance, pi);
2663	} else {
2664		args = mi->exec_args_we.we_wordv;
2665	}
2666
2667	/* Generate audit trail for start operations */
2668	if (method == IM_START) {
2669		adt_event_data_t *ae;
2670		struct sockaddr_storage ss;
2671		priv_set_t *privset;
2672		socklen_t sslen = sizeof (ss);
2673
2674		if ((ae = adt_alloc_event(audit_handle, ADT_inetd_connect))
2675		    == NULL) {
2676			error_msg(gettext("Unable to allocate audit event for "
2677			    "the %s method of instance %s"),
2678			    methods[method].name, instance->fmri);
2679			exit(IMRET_FAILURE);
2680		}
2681
2682		/*
2683		 * The inetd_connect audit record consists of:
2684		 *	Service name
2685		 *	Execution path
2686		 *	Remote address and port
2687		 *	Local port
2688		 *	Process privileges
2689		 */
2690		ae->adt_inetd_connect.service_name = cfg->svc_name;
2691		ae->adt_inetd_connect.cmd = mi->exec_path;
2692
2693		if (instance->remote_addr.ss_family == AF_INET) {
2694			struct in_addr *in = SS_SINADDR(instance->remote_addr);
2695			ae->adt_inetd_connect.ip_adr[0] = in->s_addr;
2696			ae->adt_inetd_connect.ip_type = ADT_IPv4;
2697		} else {
2698			uint32_t *addr6;
2699			int i;
2700
2701			ae->adt_inetd_connect.ip_type = ADT_IPv6;
2702			addr6 = (uint32_t *)SS_SINADDR(instance->remote_addr);
2703			for (i = 0; i < 4; ++i)
2704				ae->adt_inetd_connect.ip_adr[i] = addr6[i];
2705		}
2706
2707		ae->adt_inetd_connect.ip_remote_port =
2708		    ntohs(SS_PORT(instance->remote_addr));
2709
2710		if (getsockname(instance->conn_fd, (struct sockaddr *)&ss,
2711		    &sslen) == 0)
2712			ae->adt_inetd_connect.ip_local_port =
2713			    ntohs(SS_PORT(ss));
2714
2715		privset = mthd_ctxt->priv_set;
2716		if (privset == NULL) {
2717			privset = priv_allocset();
2718			if (privset != NULL &&
2719			    getppriv(PRIV_EFFECTIVE, privset) != 0) {
2720				priv_freeset(privset);
2721				privset = NULL;
2722			}
2723		}
2724
2725		ae->adt_inetd_connect.privileges = privset;
2726
2727		(void) adt_put_event(ae, ADT_SUCCESS, ADT_SUCCESS);
2728		adt_free_event(ae);
2729
2730		if (privset != NULL && mthd_ctxt->priv_set == NULL)
2731			priv_freeset(privset);
2732	}
2733
2734	/*
2735	 * Set method context before the fd setup below so we can output an
2736	 * error message if it fails.
2737	 */
2738	if ((errno = restarter_set_method_context(mthd_ctxt, &errf)) != 0) {
2739		const char *msg;
2740
2741		if (errno == -1) {
2742			if (strcmp(errf, "core_set_process_path") == 0) {
2743				msg = gettext("Failed to set the corefile path "
2744				    "for the %s method of instance %s");
2745			} else if (strcmp(errf, "setproject") == 0) {
2746				msg = gettext("Failed to assign a resource "
2747				    "control for the %s method of instance %s");
2748			} else if (strcmp(errf, "pool_set_binding") == 0) {
2749				msg = gettext("Failed to bind the %s method of "
2750				    "instance %s to a pool due to a system "
2751				    "error");
2752			} else {
2753				assert(0);
2754				abort();
2755			}
2756
2757			error_msg(msg, methods[method].name, instance->fmri);
2758
2759			exit(IMRET_FAILURE);
2760		}
2761
2762		if (errf != NULL && strcmp(errf, "pool_set_binding") == 0) {
2763			switch (errno) {
2764			case ENOENT:
2765				msg = gettext("Failed to find resource pool "
2766				    "for the %s method of instance %s");
2767				break;
2768
2769			case EBADF:
2770				msg = gettext("Failed to bind the %s method of "
2771				    "instance %s to a pool due to invalid "
2772				    "configuration");
2773				break;
2774
2775			case EINVAL:
2776				msg = gettext("Failed to bind the %s method of "
2777				    "instance %s to a pool due to invalid "
2778				    "pool name");
2779				break;
2780
2781			default:
2782				assert(0);
2783				abort();
2784			}
2785
2786			exit(IMRET_FAILURE);
2787		}
2788
2789		if (errf != NULL) {
2790			error_msg(gettext("Failed to set credentials for the "
2791			    "%s method of instance %s (%s: %s)"),
2792			    methods[method].name, instance->fmri, errf,
2793			    strerror(errno));
2794			exit(IMRET_FAILURE);
2795		}
2796
2797		switch (errno) {
2798		case ENOMEM:
2799			msg = gettext("Failed to set credentials for the %s "
2800			    "method of instance %s (out of memory)");
2801			break;
2802
2803		case ENOENT:
2804			msg = gettext("Failed to set credentials for the %s "
2805			    "method of instance %s (no passwd or shadow "
2806			    "entry for user)");
2807			break;
2808
2809		default:
2810			assert(0);
2811			abort();
2812		}
2813
2814		error_msg(msg, methods[method].name, instance->fmri);
2815		exit(IMRET_FAILURE);
2816	}
2817
2818	/* let exec() free mthd_ctxt */
2819
2820	/* setup standard fds */
2821	if (method == IM_START) {
2822		(void) dup2(instance->conn_fd, STDIN_FILENO);
2823	} else {
2824		(void) close(STDIN_FILENO);
2825		(void) open("/dev/null", O_RDONLY);
2826	}
2827	(void) dup2(STDIN_FILENO, STDOUT_FILENO);
2828	(void) dup2(STDIN_FILENO, STDERR_FILENO);
2829
2830	closefrom(STDERR_FILENO + 1);
2831
2832	method_preexec();
2833
2834	env = set_smf_env(mthd_ctxt, instance, methods[method].name);
2835
2836	if (env != NULL) {
2837		do {
2838			(void) execve(mi->exec_path, args, env);
2839		} while (errno == EINTR);
2840	}
2841
2842	serrno = errno;
2843	/* start up logging again to report the error */
2844	msg_init();
2845	errno = serrno;
2846
2847	error_msg(
2848	    gettext("Failed to exec %s method of instance %s: %s"),
2849	    methods[method].name, instance->fmri, strerror(errno));
2850
2851	if ((method == IM_START) && (instance->config->basic->iswait)) {
2852		/*
2853		 * We couldn't exec the start method for a wait type service.
2854		 * Eat up data from the endpoint, so that hopefully the
2855		 * service's fd won't wake poll up on the next time round
2856		 * event_loop(). This behavior is carried over from the old
2857		 * inetd, and it seems somewhat arbitrary that it isn't
2858		 * also done in the case of fork failures; but I guess
2859		 * it assumes an exec failure is less likely to be the result
2860		 * of a resource shortage, and is thus not worth retrying.
2861		 */
2862		consume_wait_data(instance, 0);
2863	}
2864
2865	exit(IMRET_FAILURE);
2866}
2867
2868static restarter_error_t
2869get_method_error_success(instance_method_t method)
2870{
2871	switch (method) {
2872	case IM_OFFLINE:
2873		return (RERR_RESTART);
2874	case IM_ONLINE:
2875		return (RERR_RESTART);
2876	case IM_DISABLE:
2877		return (RERR_RESTART);
2878	case IM_REFRESH:
2879		return (RERR_REFRESH);
2880	case IM_START:
2881		return (RERR_RESTART);
2882	}
2883	(void) fprintf(stderr, gettext("Internal fatal error in inetd.\n"));
2884
2885	abort();
2886	/* NOTREACHED */
2887}
2888
2889/*
2890 * Runs the specified method of the specified service instance.
2891 * If the method was never specified, we handle it the same as if the
2892 * method was called and returned success, carrying on any transition the
2893 * instance may be in the midst of.
2894 * If the method isn't executable in its specified profile or an error occurs
2895 * forking a process to run the method in the function returns -1.
2896 * If a method binary is successfully executed, the function switches the
2897 * instance's cur state to the method's associated 'run' state and the next
2898 * state to the methods associated next state.
2899 * Returns -1 if there's an error before forking, else 0.
2900 */
2901int
2902run_method(instance_t *instance, instance_method_t method,
2903    const proto_info_t *start_info)
2904{
2905	pid_t			child_pid;
2906	method_info_t		*mi;
2907	struct method_context	*mthd_ctxt = NULL;
2908	const char		*errstr;
2909	int			sig;
2910	int			ret;
2911	instance_cfg_t		*cfg = instance->config;
2912	ctid_t			cid;
2913	boolean_t		trans_failure = B_TRUE;
2914	int			serrno;
2915
2916	debug_msg("Entering run_method, instance: %s, method: %s",
2917	    instance->fmri, methods[method].name);
2918
2919	/*
2920	 * Don't bother updating the instance's state for the start method
2921	 * as there isn't a separate start method state.
2922	 */
2923	if (method != IM_START)
2924		update_instance_states(instance, get_method_state(method),
2925		    methods[method].dst_state,
2926		    get_method_error_success(method));
2927
2928	if ((mi = cfg->methods[method]) == NULL) {
2929		/*
2930		 * An unspecified method. Since the absence of this method
2931		 * must be valid (otherwise it would have been caught
2932		 * during configuration validation), simply pretend the method
2933		 * ran and returned success.
2934		 */
2935		process_non_start_term(instance, IMRET_SUCCESS);
2936		return (0);
2937	}
2938
2939	/* Handle special method tokens, not allowed on start */
2940	if (method != IM_START) {
2941		if (restarter_is_null_method(mi->exec_path)) {
2942			/* :true means nothing should be done */
2943			process_non_start_term(instance, IMRET_SUCCESS);
2944			return (0);
2945		}
2946
2947		if ((sig = restarter_is_kill_method(mi->exec_path)) >= 0) {
2948			/* Carry out contract assassination */
2949			ret = iterate_repository_contracts(instance, sig);
2950			/* ENOENT means we didn't find any contracts */
2951			if (ret != 0 && ret != ENOENT) {
2952				error_msg(gettext("Failed to send signal %d "
2953				    "to contracts of instance %s: %s"), sig,
2954				    instance->fmri, strerror(ret));
2955				goto prefork_failure;
2956			} else {
2957				process_non_start_term(instance, IMRET_SUCCESS);
2958				return (0);
2959			}
2960		}
2961
2962		if ((sig = restarter_is_kill_proc_method(mi->exec_path)) >= 0) {
2963			/* Carry out process assassination */
2964			rep_val_t	*rv;
2965
2966			ret = IMRET_SUCCESS;
2967			for (rv = uu_list_first(instance->start_pids);
2968			    rv != NULL;
2969			    rv = uu_list_next(instance->start_pids, rv)) {
2970				if ((kill((pid_t)rv->val, sig) != 0) &&
2971				    (errno != ESRCH)) {
2972					ret = IMRET_FAILURE;
2973					error_msg(gettext("Unable to signal "
2974					    "start process of instance %s: %s"),
2975					    instance->fmri, strerror(errno));
2976				}
2977			}
2978
2979			process_non_start_term(instance, ret);
2980			return (0);
2981		}
2982	}
2983
2984	/*
2985	 * Get the associated method context before the fork so we can
2986	 * modify the instances state if things go wrong.
2987	 */
2988	if ((mthd_ctxt = read_method_context(instance->fmri,
2989	    methods[method].name, mi->exec_path, &errstr)) == NULL) {
2990		error_msg(gettext("Failed to retrieve method context for the "
2991		    "%s method of instance %s: %s"), methods[method].name,
2992		    instance->fmri, errstr);
2993		goto prefork_failure;
2994	}
2995
2996	/*
2997	 * Perform some basic checks before we fork to limit the possibility
2998	 * of exec failures, so we can modify the instance state if necessary.
2999	 */
3000	if (!passes_basic_exec_checks(instance->fmri, methods[method].name,
3001	    mi->exec_path)) {
3002		trans_failure = B_FALSE;
3003		goto prefork_failure;
3004	}
3005
3006	if (contract_prefork() == -1)
3007		goto prefork_failure;
3008	child_pid = fork();
3009	serrno = errno;
3010	contract_postfork();
3011
3012	switch (child_pid) {
3013	case -1:
3014		error_msg(gettext(
3015		    "Unable to fork %s method of instance %s: %s"),
3016		    methods[method].name, instance->fmri, strerror(serrno));
3017		if ((serrno != EAGAIN) && (serrno != ENOMEM))
3018			trans_failure = B_FALSE;
3019		goto prefork_failure;
3020	case 0:				/* child */
3021		exec_method(instance, method, mi, mthd_ctxt, start_info);
3022		/* NOTREACHED */
3023	default:			/* parent */
3024		restarter_free_method_context(mthd_ctxt);
3025		mthd_ctxt = NULL;
3026
3027		if (get_latest_contract(&cid) < 0)
3028			cid = -1;
3029
3030		/*
3031		 * Register this method so its termination is noticed and
3032		 * the state transition this method participates in is
3033		 * continued.
3034		 */
3035		if (register_method(instance, child_pid, cid, method) != 0) {
3036			/*
3037			 * Since we will never find out about the termination
3038			 * of this method, if it's a non-start method treat
3039			 * is as a failure so we don't block restarter event
3040			 * processing on it whilst it languishes in a method
3041			 * running state.
3042			 */
3043			error_msg(gettext("Failed to monitor status of "
3044			    "%s method of instance %s"), methods[method].name,
3045			    instance->fmri);
3046			if (method != IM_START)
3047				process_non_start_term(instance, IMRET_FAILURE);
3048		}
3049
3050		add_method_ids(instance, child_pid, cid, method);
3051
3052		/* do tcp tracing for those nowait instances that request it */
3053		if ((method == IM_START) && cfg->basic->do_tcp_trace &&
3054		    !cfg->basic->iswait) {
3055			char buf[INET6_ADDRSTRLEN];
3056
3057			syslog(LOG_NOTICE, "%s[%d] from %s %d",
3058			    cfg->basic->svc_name, child_pid,
3059			    inet_ntop_native(instance->remote_addr.ss_family,
3060			    SS_SINADDR(instance->remote_addr), buf,
3061			    sizeof (buf)),
3062			    ntohs(SS_PORT(instance->remote_addr)));
3063		}
3064	}
3065
3066	return (0);
3067
3068prefork_failure:
3069	if (mthd_ctxt != NULL) {
3070		restarter_free_method_context(mthd_ctxt);
3071		mthd_ctxt = NULL;
3072	}
3073
3074	if (method == IM_START) {
3075		/*
3076		 * Only place a start method in maintenance if we're sure
3077		 * that the failure was non-transient.
3078		 */
3079		if (!trans_failure) {
3080			destroy_bound_fds(instance);
3081			update_state(instance, IIS_MAINTENANCE, RERR_FAULT);
3082		}
3083	} else {
3084		/* treat the failure as if the method ran and failed */
3085		process_non_start_term(instance, IMRET_FAILURE);
3086	}
3087
3088	return (-1);
3089}
3090
3091static int
3092accept_connection(instance_t *instance, proto_info_t *pi)
3093{
3094	int		fd;
3095	socklen_t	size;
3096
3097	debug_msg("Entering accept_connection");
3098
3099	if (instance->config->basic->istlx) {
3100		fd = tlx_accept(instance->fmri, (tlx_info_t *)pi,
3101		    &(instance->remote_addr));
3102	} else {
3103		size = sizeof (instance->remote_addr);
3104		fd = accept(pi->listen_fd,
3105		    (struct sockaddr *)&(instance->remote_addr), &size);
3106		if (fd < 0)
3107			error_msg("accept: %s", strerror(errno));
3108	}
3109
3110	return (fd);
3111}
3112
3113/*
3114 * Handle an incoming connection request for a nowait service.
3115 * This involves accepting the incoming connection on a new fd. Connection
3116 * rate checks are then performed, transitioning the service to the
3117 * conrate offline state if these fail. Otherwise, the service's start method
3118 * is run (performing TCP wrappers checks if applicable as we do), and on
3119 * success concurrent copies checking is done, transitioning the service to the
3120 * copies offline state if this fails.
3121 */
3122static void
3123process_nowait_request(instance_t *instance, proto_info_t *pi)
3124{
3125	basic_cfg_t		*cfg = instance->config->basic;
3126	int			ret;
3127	adt_event_data_t	*ae;
3128	char			buf[BUFSIZ];
3129
3130	debug_msg("Entering process_nowait_req");
3131
3132	/* accept nowait service connections on a new fd */
3133	if ((instance->conn_fd = accept_connection(instance, pi)) == -1) {
3134		/*
3135		 * Failed accept. Return and allow the event loop to initiate
3136		 * another attempt later if the request is still present.
3137		 */
3138		return;
3139	}
3140
3141	/*
3142	 * Limit connection rate of nowait services. If either conn_rate_max
3143	 * or conn_rate_offline are <= 0, no connection rate limit checking
3144	 * is done. If the configured rate is exceeded, the instance is taken
3145	 * to the connrate_offline state and a timer scheduled to try and
3146	 * bring the instance back online after the configured offline time.
3147	 */
3148	if ((cfg->conn_rate_max > 0) && (cfg->conn_rate_offline > 0)) {
3149		if (instance->conn_rate_count++ == 0) {
3150			instance->conn_rate_start = time(NULL);
3151		} else if (instance->conn_rate_count >
3152		    cfg->conn_rate_max) {
3153			time_t now = time(NULL);
3154
3155			if ((now - instance->conn_rate_start) > 1) {
3156				instance->conn_rate_start = now;
3157				instance->conn_rate_count = 1;
3158			} else {
3159				/* Generate audit record */
3160				if ((ae = adt_alloc_event(audit_handle,
3161				    ADT_inetd_ratelimit)) == NULL) {
3162					error_msg(gettext("Unable to allocate "
3163					    "rate limit audit event"));
3164				} else {
3165					adt_inetd_ratelimit_t *rl =
3166					    &ae->adt_inetd_ratelimit;
3167					/*
3168					 * The inetd_ratelimit audit
3169					 * record consists of:
3170					 * 	Service name
3171					 *	Connection rate limit
3172					 */
3173					rl->service_name = cfg->svc_name;
3174					(void) snprintf(buf, sizeof (buf),
3175					    "limit=%lld", cfg->conn_rate_max);
3176					rl->limit = buf;
3177					(void) adt_put_event(ae, ADT_SUCCESS,
3178					    ADT_SUCCESS);
3179					adt_free_event(ae);
3180				}
3181
3182				error_msg(gettext(
3183				    "Instance %s has exceeded its configured "
3184				    "connection rate, additional connections "
3185				    "will not be accepted for %d seconds"),
3186				    instance->fmri, cfg->conn_rate_offline);
3187
3188				close_net_fd(instance, instance->conn_fd);
3189				instance->conn_fd = -1;
3190
3191				destroy_bound_fds(instance);
3192
3193				instance->conn_rate_count = 0;
3194
3195				instance->conn_rate_exceeded = B_TRUE;
3196				(void) run_method(instance, IM_OFFLINE, NULL);
3197
3198				return;
3199			}
3200		}
3201	}
3202
3203	ret = run_method(instance, IM_START, pi);
3204
3205	close_net_fd(instance, instance->conn_fd);
3206	instance->conn_fd = -1;
3207
3208	if (ret == -1) /* the method wasn't forked  */
3209		return;
3210
3211	instance->copies++;
3212
3213	/*
3214	 * Limit concurrent connections of nowait services.
3215	 */
3216	if (copies_limit_exceeded(instance)) {
3217		/* Generate audit record */
3218		if ((ae = adt_alloc_event(audit_handle, ADT_inetd_copylimit))
3219		    == NULL) {
3220			error_msg(gettext("Unable to allocate copy limit "
3221			    "audit event"));
3222		} else {
3223			/*
3224			 * The inetd_copylimit audit record consists of:
3225			 *	Service name
3226			 * 	Copy limit
3227			 */
3228			ae->adt_inetd_copylimit.service_name = cfg->svc_name;
3229			(void) snprintf(buf, sizeof (buf), "limit=%lld",
3230			    cfg->max_copies);
3231			ae->adt_inetd_copylimit.limit = buf;
3232			(void) adt_put_event(ae, ADT_SUCCESS, ADT_SUCCESS);
3233			adt_free_event(ae);
3234		}
3235
3236		warn_msg(gettext("Instance %s has reached its maximum "
3237		    "configured copies, no new connections will be accepted"),
3238		    instance->fmri);
3239		destroy_bound_fds(instance);
3240		(void) run_method(instance, IM_OFFLINE, NULL);
3241	}
3242}
3243
3244/*
3245 * Handle an incoming request for a wait type service.
3246 * Failure rate checking is done first, taking the service to the maintenance
3247 * state if the checks fail. Following this, the service's start method is run,
3248 * and on success, we stop listening for new requests for this service.
3249 */
3250static void
3251process_wait_request(instance_t *instance, const proto_info_t *pi)
3252{
3253	basic_cfg_t		*cfg = instance->config->basic;
3254	int			ret;
3255	adt_event_data_t	*ae;
3256	char			buf[BUFSIZ];
3257
3258	debug_msg("Entering process_wait_request");
3259
3260	instance->conn_fd = pi->listen_fd;
3261
3262	/*
3263	 * Detect broken servers and transition them to maintenance. If a
3264	 * wait type service exits without accepting the connection or
3265	 * consuming (reading) the datagram, that service's descriptor will
3266	 * select readable again, and inetd will fork another instance of
3267	 * the server. If either wait_fail_cnt or wait_fail_interval are <= 0,
3268	 * no failure rate detection is done.
3269	 */
3270	if ((cfg->wait_fail_cnt > 0) && (cfg->wait_fail_interval > 0)) {
3271		if (instance->fail_rate_count++ == 0) {
3272			instance->fail_rate_start = time(NULL);
3273		} else if (instance->fail_rate_count > cfg->wait_fail_cnt) {
3274			time_t now = time(NULL);
3275
3276			if ((now - instance->fail_rate_start) >
3277			    cfg->wait_fail_interval) {
3278				instance->fail_rate_start = now;
3279				instance->fail_rate_count = 1;
3280			} else {
3281				/* Generate audit record */
3282				if ((ae = adt_alloc_event(audit_handle,
3283				    ADT_inetd_failrate)) == NULL) {
3284					error_msg(gettext("Unable to allocate "
3285					    "failure rate audit event"));
3286				} else {
3287					adt_inetd_failrate_t *fr =
3288					    &ae->adt_inetd_failrate;
3289					/*
3290					 * The inetd_failrate audit record
3291					 * consists of:
3292					 * 	Service name
3293					 * 	Failure rate
3294					 *	Interval
3295					 * Last two are expressed as k=v pairs
3296					 * in the values field.
3297					 */
3298					fr->service_name = cfg->svc_name;
3299					(void) snprintf(buf, sizeof (buf),
3300					    "limit=%lld,interval=%d",
3301					    cfg->wait_fail_cnt,
3302					    cfg->wait_fail_interval);
3303					fr->values = buf;
3304					(void) adt_put_event(ae, ADT_SUCCESS,
3305					    ADT_SUCCESS);
3306					adt_free_event(ae);
3307				}
3308
3309				error_msg(gettext(
3310				    "Instance %s has exceeded its configured "
3311				    "failure rate, transitioning to "
3312				    "maintenance"), instance->fmri);
3313				instance->fail_rate_count = 0;
3314
3315				destroy_bound_fds(instance);
3316
3317				instance->maintenance_req = B_TRUE;
3318				(void) run_method(instance, IM_OFFLINE, NULL);
3319				return;
3320			}
3321		}
3322	}
3323
3324	ret = run_method(instance, IM_START, pi);
3325
3326	instance->conn_fd = -1;
3327
3328	if (ret == 0) {
3329		/*
3330		 * Stop listening for connections now we've fired off the
3331		 * server for a wait type instance.
3332		 */
3333		(void) poll_bound_fds(instance, B_FALSE);
3334	}
3335}
3336
3337/*
3338 * Process any networks requests for each proto for each instance.
3339 */
3340void
3341process_network_events(void)
3342{
3343	instance_t	*instance;
3344
3345	debug_msg("Entering process_network_events");
3346
3347	for (instance = uu_list_first(instance_list); instance != NULL;
3348	    instance = uu_list_next(instance_list, instance)) {
3349		basic_cfg_t	*cfg;
3350		proto_info_t	*pi;
3351
3352		/*
3353		 * Ignore instances in states that definitely don't have any
3354		 * listening fds.
3355		 */
3356		switch (instance->cur_istate) {
3357		case IIS_ONLINE:
3358		case IIS_DEGRADED:
3359		case IIS_IN_REFRESH_METHOD:
3360			break;
3361		default:
3362			continue;
3363		}
3364
3365		cfg = instance->config->basic;
3366
3367		for (pi = uu_list_first(cfg->proto_list); pi != NULL;
3368		    pi = uu_list_next(cfg->proto_list, pi)) {
3369			if ((pi->listen_fd != -1) &&
3370			    isset_pollfd(pi->listen_fd)) {
3371				if (cfg->iswait) {
3372					process_wait_request(instance, pi);
3373				} else {
3374					process_nowait_request(instance, pi);
3375				}
3376			}
3377		}
3378	}
3379}
3380
3381/* ARGSUSED0 */
3382static void
3383sigterm_handler(int sig)
3384{
3385	debug_msg("Entering sigterm_handler");
3386
3387	got_sigterm = B_TRUE;
3388}
3389
3390/* ARGSUSED0 */
3391static void
3392sighup_handler(int sig)
3393{
3394	debug_msg("Entering sighup_handler");
3395
3396	refresh_inetd_requested = B_TRUE;
3397}
3398
3399/*
3400 * inetd's major work loop. This function sits in poll waiting for events
3401 * to occur, processing them when they do. The possible events are
3402 * master restarter requests, expired timer queue timers, stop/refresh signal
3403 * requests, contract events indicating process termination, stop/refresh
3404 * requests originating from one of the stop/refresh inetd processes and
3405 * network events.
3406 * The loop is exited when a stop request is received and processed, and
3407 * all the instances have reached a suitable 'stopping' state.
3408 */
3409static void
3410event_loop(void)
3411{
3412	instance_t		*instance;
3413	int			timeout;
3414
3415	debug_msg("Entering event_loop");
3416
3417	for (;;) {
3418		int	pret = -1;
3419
3420		timeout = iu_earliest_timer(timer_queue);
3421
3422		debug_msg("Doing signal check/poll");
3423		if (!got_sigterm && !refresh_inetd_requested) {
3424			pret = poll(poll_fds, num_pollfds, timeout);
3425			if ((pret == -1) && (errno != EINTR)) {
3426				error_msg(gettext("poll failure: %s"),
3427				    strerror(errno));
3428				continue;
3429			}
3430			debug_msg("Exiting poll, returned: %d", pret);
3431		}
3432
3433		if (got_sigterm) {
3434			msg_fini();
3435			inetd_stop();
3436			got_sigterm = B_FALSE;
3437			goto check_if_stopped;
3438		}
3439
3440		/*
3441		 * Process any stop/refresh requests from the Unix Domain
3442		 * Socket.
3443		 */
3444		if ((pret != -1) && isset_pollfd(uds_fd)) {
3445			while (process_uds_event() == 0)
3446				;
3447		}
3448
3449		/*
3450		 * Process refresh request. We do this check after the UDS
3451		 * event check above, as it would be wasted processing if we
3452		 * started refreshing inetd based on a SIGHUP, and then were
3453		 * told to shut-down via a UDS event.
3454		 */
3455		if (refresh_inetd_requested) {
3456			refresh_inetd_requested = B_FALSE;
3457			if (!inetd_stopping)
3458				inetd_refresh();
3459		}
3460
3461		/*
3462		 * We were interrupted by a signal. Don't waste any more
3463		 * time processing a potentially inaccurate poll return.
3464		 */
3465		if (pret == -1)
3466			continue;
3467
3468		/*
3469		 * Process any instance restarter events.
3470		 */
3471		if (isset_pollfd(rst_event_pipe[PE_CONSUMER])) {
3472			while (process_restarter_event() == 0)
3473				;
3474		}
3475
3476		/*
3477		 * Process any expired timers (bind retry, con-rate offline,
3478		 * method timeouts).
3479		 */
3480		(void) iu_expire_timers(timer_queue);
3481
3482		process_terminated_methods();
3483
3484		/*
3485		 * If inetd is stopping, check whether all our managed
3486		 * instances have been stopped and we can return.
3487		 */
3488		if (inetd_stopping) {
3489check_if_stopped:
3490			for (instance = uu_list_first(instance_list);
3491			    instance != NULL;
3492			    instance = uu_list_next(instance_list, instance)) {
3493				if (!instance_stopped(instance)) {
3494					debug_msg("%s not yet stopped",
3495					    instance->fmri);
3496					break;
3497				}
3498			}
3499			/* if all instances are stopped, return */
3500			if (instance == NULL)
3501				return;
3502		}
3503
3504		process_network_events();
3505	}
3506}
3507
3508static void
3509fini(void)
3510{
3511	debug_msg("Entering fini");
3512
3513	method_fini();
3514	uds_fini();
3515	if (timer_queue != NULL)
3516		iu_tq_destroy(timer_queue);
3517
3518
3519	/*
3520	 * We don't bother to undo the restarter interface at all.
3521	 * Because of quirks in the interface, there is no way to
3522	 * disconnect from the channel and cause any new events to be
3523	 * queued.  However, any events which are received and not
3524	 * acknowledged will be re-sent when inetd restarts as long as inetd
3525	 * uses the same subscriber ID, which it does.
3526	 *
3527	 * By keeping the event pipe open but ignoring it, any events which
3528	 * occur will cause restarter_event_proxy to hang without breaking
3529	 * anything.
3530	 */
3531
3532	if (instance_list != NULL) {
3533		void		*cookie = NULL;
3534		instance_t	*inst;
3535
3536		while ((inst = uu_list_teardown(instance_list, &cookie)) !=
3537		    NULL)
3538			destroy_instance(inst);
3539		uu_list_destroy(instance_list);
3540	}
3541	if (instance_pool != NULL)
3542		uu_list_pool_destroy(instance_pool);
3543	tlx_fini();
3544	config_fini();
3545	repval_fini();
3546	poll_fini();
3547
3548	/* Close audit session */
3549	(void) adt_end_session(audit_handle);
3550}
3551
3552static int
3553init(void)
3554{
3555	int err;
3556
3557	debug_msg("Entering init");
3558
3559	if (repval_init() < 0)
3560		goto failed;
3561
3562	if (config_init() < 0)
3563		goto failed;
3564
3565	if (tlx_init() < 0)
3566		goto failed;
3567
3568	/* Setup instance list. */
3569	if ((instance_pool = uu_list_pool_create("instance_pool",
3570	    sizeof (instance_t), offsetof(instance_t, link), NULL,
3571	    UU_LIST_POOL_DEBUG)) == NULL) {
3572		error_msg("%s: %s",
3573		    gettext("Failed to create instance pool"),
3574		    uu_strerror(uu_error()));
3575		goto failed;
3576	}
3577	if ((instance_list = uu_list_create(instance_pool, NULL, 0)) == NULL) {
3578		error_msg("%s: %s",
3579		    gettext("Failed to create instance list"),
3580		    uu_strerror(uu_error()));
3581		goto failed;
3582	}
3583
3584	/*
3585	 * Create event pipe to communicate events with the main event
3586	 * loop and add it to the event loop's fdset.
3587	 */
3588	if (pipe(rst_event_pipe) < 0) {
3589		error_msg("pipe: %s", strerror(errno));
3590		goto failed;
3591	}
3592	/*
3593	 * We only leave the producer end to block on reads/writes as we
3594	 * can't afford to block in the main thread, yet need to in
3595	 * the restarter event thread, so it can sit and wait for an
3596	 * acknowledgement to be written to the pipe.
3597	 */
3598	disable_blocking(rst_event_pipe[PE_CONSUMER]);
3599	if ((set_pollfd(rst_event_pipe[PE_CONSUMER], POLLIN)) == -1)
3600		goto failed;
3601
3602	/*
3603	 * Register with master restarter for managed service events. This
3604	 * will fail, amongst other reasons, if inetd is already running.
3605	 */
3606	if ((err = restarter_bind_handle(RESTARTER_EVENT_VERSION,
3607	    INETD_INSTANCE_FMRI, restarter_event_proxy, 0,
3608	    &rst_event_handle)) != 0) {
3609		error_msg(gettext(
3610		    "Failed to register for restarter events: %s"),
3611		    strerror(err));
3612		goto failed;
3613	}
3614
3615	if (contract_init() < 0)
3616		goto failed;
3617
3618	if ((timer_queue = iu_tq_create()) == NULL) {
3619		error_msg(gettext("Failed to create timer queue."));
3620		goto failed;
3621	}
3622
3623	if (uds_init() < 0)
3624		goto failed;
3625
3626	if (method_init() < 0)
3627		goto failed;
3628
3629	/* Initialize auditing session */
3630	if (adt_start_session(&audit_handle, NULL, ADT_USE_PROC_DATA) != 0) {
3631		error_msg(gettext("Unable to start audit session"));
3632	}
3633
3634	/*
3635	 * Initialize signal dispositions/masks
3636	 */
3637	(void) sigset(SIGHUP, sighup_handler);
3638	(void) sigset(SIGTERM, sigterm_handler);
3639	(void) sigignore(SIGINT);
3640
3641	return (0);
3642
3643failed:
3644	fini();
3645	return (-1);
3646}
3647
3648static int
3649start_method(void)
3650{
3651	int	i;
3652	int	pipe_fds[2];
3653	int	child;
3654
3655	debug_msg("ENTERING START_METHOD:");
3656
3657	/* Create pipe for child to notify parent of initialization success. */
3658	if (pipe(pipe_fds) < 0) {
3659		debug_msg("pipe: %s", strerror(errno));
3660		return (SMF_EXIT_ERR_OTHER);
3661	}
3662
3663	if ((child = fork()) == -1) {
3664		debug_msg("fork: %s", strerror(errno));
3665		(void) close(pipe_fds[PE_CONSUMER]);
3666		(void) close(pipe_fds[PE_PRODUCER]);
3667		return (SMF_EXIT_ERR_OTHER);
3668	} else if (child > 0) {			/* parent */
3669
3670		/* Wait on child to return success of initialization. */
3671		(void) close(pipe_fds[PE_PRODUCER]);
3672		if ((safe_read(pipe_fds[PE_CONSUMER], &i, sizeof (i)) != 0) ||
3673		    (i < 0)) {
3674			error_msg(gettext(
3675			    "Initialization failed, unable to start"));
3676			(void) close(pipe_fds[PE_CONSUMER]);
3677			/*
3678			 * Batch all initialization errors as 'other' errors,
3679			 * resulting in retries being attempted.
3680			 */
3681			return (SMF_EXIT_ERR_OTHER);
3682		} else {
3683			(void) close(pipe_fds[PE_CONSUMER]);
3684			return (SMF_EXIT_OK);
3685		}
3686	} else {				/* child */
3687		/*
3688		 * Perform initialization and return success code down
3689		 * the pipe.
3690		 */
3691		(void) close(pipe_fds[PE_CONSUMER]);
3692		i = init();
3693		if ((safe_write(pipe_fds[PE_PRODUCER], &i, sizeof (i)) < 0) ||
3694		    (i < 0)) {
3695			error_msg(gettext("pipe write failure: %s"),
3696			    strerror(errno));
3697			exit(1);
3698		}
3699		(void) close(pipe_fds[PE_PRODUCER]);
3700
3701		(void) setsid();
3702
3703		/*
3704		 * Log a message if the configuration file has changed since
3705		 * inetconv was last run.
3706		 */
3707		check_conf_file();
3708
3709		event_loop();
3710
3711		fini();
3712		debug_msg("inetd stopped");
3713		msg_fini();
3714		exit(0);
3715	}
3716	/* NOTREACHED */
3717}
3718
3719/*
3720 * When inetd is run from outside the SMF, this message is output to provide
3721 * the person invoking inetd with further information that will help them
3722 * understand how to start and stop inetd, and to achieve the other
3723 * behaviors achievable with the legacy inetd command line interface, if
3724 * it is possible.
3725 */
3726static void
3727legacy_usage(void)
3728{
3729	(void) fprintf(stderr,
3730	    "inetd is now an smf(5) managed service and can no longer be run "
3731	    "from the\n"
3732	    "command line. To enable or disable inetd refer to svcadm(1M) on\n"
3733	    "how to enable \"%s\", the inetd instance.\n"
3734	    "\n"
3735	    "The traditional inetd command line option mappings are:\n"
3736	    "\t-d : there is no supported debug output\n"
3737	    "\t-s : inetd is only runnable from within the SMF\n"
3738	    "\t-t : See inetadm(1M) on how to enable TCP tracing\n"
3739	    "\t-r : See inetadm(1M) on how to set a failure rate\n"
3740	    "\n"
3741	    "To specify an alternative configuration file see svccfg(1M)\n"
3742	    "for how to modify the \"%s/%s\" string type property of\n"
3743	    "the inetd instance, and modify it according to the syntax:\n"
3744	    "\"%s [alt_config_file] %%m\".\n"
3745	    "\n"
3746	    "For further information on inetd see inetd(1M).\n",
3747	    INETD_INSTANCE_FMRI, START_METHOD_ARG, SCF_PROPERTY_EXEC,
3748	    INETD_PATH);
3749}
3750
3751/*
3752 * Usage message printed out for usage errors when running under the SMF.
3753 */
3754static void
3755smf_usage(const char *arg0)
3756{
3757	error_msg("Usage: %s [alt_conf_file] %s|%s|%s", arg0, START_METHOD_ARG,
3758	    STOP_METHOD_ARG, REFRESH_METHOD_ARG);
3759}
3760
3761/*
3762 * Returns B_TRUE if we're being run from within the SMF, else B_FALSE.
3763 */
3764static boolean_t
3765run_through_smf(void)
3766{
3767	char *fmri;
3768
3769	/*
3770	 * check if the instance fmri environment variable has been set by
3771	 * our restarter.
3772	 */
3773	return (((fmri = getenv("SMF_FMRI")) != NULL) &&
3774	    (strcmp(fmri, INETD_INSTANCE_FMRI) == 0));
3775}
3776
3777int
3778main(int argc, char *argv[])
3779{
3780	char		*method;
3781	int		ret;
3782
3783#if	!defined(TEXT_DOMAIN)
3784#define	TEXT_DOMAIN "SYS_TEST"
3785#endif
3786	(void) textdomain(TEXT_DOMAIN);
3787	(void) setlocale(LC_ALL, "");
3788
3789	if (!run_through_smf()) {
3790		legacy_usage();
3791		return (SMF_EXIT_ERR_NOSMF);
3792	}
3793
3794	msg_init();	/* setup logging */
3795
3796	(void) enable_extended_FILE_stdio(-1, -1);
3797
3798	/* inetd invocation syntax is inetd [alt_conf_file] method_name */
3799
3800	switch (argc) {
3801	case 2:
3802		method = argv[1];
3803		break;
3804	case 3:
3805		conf_file = argv[1];
3806		method = argv[2];
3807		break;
3808	default:
3809		smf_usage(argv[0]);
3810		return (SMF_EXIT_ERR_CONFIG);
3811
3812	}
3813
3814	if (strcmp(method, START_METHOD_ARG) == 0) {
3815		ret = start_method();
3816	} else if (strcmp(method, STOP_METHOD_ARG) == 0) {
3817		ret = stop_method();
3818	} else if (strcmp(method, REFRESH_METHOD_ARG) == 0) {
3819		ret = refresh_method();
3820	} else {
3821		smf_usage(argv[0]);
3822		return (SMF_EXIT_ERR_CONFIG);
3823	}
3824
3825	return (ret);
3826}
3827