1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * This file contains a set of routines used to perform wait based method
28 * reaping.
29 */
30
31#include <wait.h>
32#include <sys/param.h>
33#include <fcntl.h>
34#include <libcontract.h>
35#include <errno.h>
36#include <libintl.h>
37#include <unistd.h>
38#include <stdlib.h>
39#include <string.h>
40#include <sys/resource.h>
41#include "inetd_impl.h"
42
43/* inetd's open file limit, set in method_init() */
44#define	INETD_NOFILE_LIMIT RLIM_INFINITY
45
46/* structure used to represent an active method process */
47typedef struct {
48	int			fd;	/* fd of process's /proc psinfo file */
49	/* associated contract id if known, else -1 */
50	ctid_t			cid;
51	pid_t			pid;
52	instance_t		*inst;	/* pointer to associated instance */
53	instance_method_t	method;	/* the method type running */
54	/* associated endpoint protocol name if known, else NULL */
55	char			*proto_name;
56	uu_list_node_t		link;
57} method_el_t;
58
59
60static void unregister_method(method_el_t *);
61
62
63/* list of currently executing method processes */
64static uu_list_pool_t		*method_pool = NULL;
65static uu_list_t		*method_list = NULL;
66
67/*
68 * File limit saved during initialization before modification, so that it can
69 * be reverted back to for inetd's exec'd methods.
70 */
71static struct rlimit		saved_file_limit;
72
73/*
74 * Setup structures used for method termination monitoring.
75 * Returns -1 if an allocation failure occurred, else 0.
76 */
77int
78method_init(void)
79{
80	struct rlimit rl;
81
82	/*
83	 * Save aside the old file limit and impose one large enough to support
84	 * all the /proc file handles we could have open.
85	 */
86
87	(void) getrlimit(RLIMIT_NOFILE, &saved_file_limit);
88
89	rl.rlim_cur = rl.rlim_max = INETD_NOFILE_LIMIT;
90	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
91		error_msg("Failed to set file limit: %s", strerror(errno));
92		return (-1);
93	}
94
95	if ((method_pool = uu_list_pool_create("method_pool",
96	    sizeof (method_el_t), offsetof(method_el_t, link), NULL,
97	    UU_LIST_POOL_DEBUG)) == NULL) {
98		error_msg("%s: %s", gettext("Failed to create method pool"),
99		    uu_strerror(uu_error()));
100		return (-1);
101	}
102
103	if ((method_list = uu_list_create(method_pool, NULL, 0)) == NULL) {
104		error_msg("%s: %s",
105		    gettext("Failed to create method list"),
106		    uu_strerror(uu_error()));
107		/* let method_fini() clean-up */
108		return (-1);
109	}
110
111	return (0);
112}
113
114/*
115 * Tear-down structures created in method_init().
116 */
117void
118method_fini(void)
119{
120	if (method_list != NULL) {
121		method_el_t *me;
122
123		while ((me = uu_list_first(method_list)) != NULL)
124			unregister_method(me);
125
126		(void) uu_list_destroy(method_list);
127		method_list = NULL;
128	}
129	if (method_pool != NULL) {
130		(void) uu_list_pool_destroy(method_pool);
131		method_pool = NULL;
132	}
133
134	/* revert file limit */
135	method_preexec();
136}
137
138/*
139 * Revert file limit back to pre-initialization one. This shouldn't fail as
140 * long as its called *after* descriptor cleanup.
141 */
142void
143method_preexec(void)
144{
145	(void) setrlimit(RLIMIT_NOFILE, &saved_file_limit);
146}
147
148
149/*
150 * Callback function that handles the timeout of an instance's method.
151 * 'arg' points at the method_el_t representing the method.
152 */
153/* ARGSUSED0 */
154static void
155method_timeout(iu_tq_t *tq, void *arg)
156{
157	method_el_t *mp = arg;
158
159	error_msg(gettext("The %s method of instance %s timed-out"),
160	    methods[mp->method].name, mp->inst->fmri);
161
162	mp->inst->timer_id = -1;
163
164	if (mp->method == IM_START) {
165		process_start_term(mp->inst, mp->proto_name);
166	} else {
167		process_non_start_term(mp->inst, IMRET_FAILURE);
168	}
169
170	unregister_method(mp);
171}
172
173/*
174 * Registers the attributes of a running method passed as arguments so that
175 * the method's termination is noticed and any further processing of the
176 * associated instance is carried out. The function also sets up any
177 * necessary timers so we can detect hung methods.
178 * Returns -1 if either it failed to open the /proc psinfo file which is used
179 * to monitor the method process, it failed to setup a required timer or
180 * memory allocation failed; else 0.
181 */
182int
183register_method(instance_t *ins, pid_t pid, ctid_t cid, instance_method_t mthd,
184    char *proto_name)
185{
186	char		path[MAXPATHLEN];
187	int		fd;
188	method_el_t	*me;
189
190	/* open /proc psinfo file of process to listen for POLLHUP events on */
191	(void) snprintf(path, sizeof (path), "/proc/%u/psinfo", pid);
192	for (;;) {
193		if ((fd = open(path, O_RDONLY)) >= 0) {
194			break;
195		} else if (errno != EINTR) {
196			/*
197			 * Don't output an error for ENOENT; we get this
198			 * if a method has gone away whilst we were stopped,
199			 * and we're now trying to re-listen for it.
200			 */
201			if (errno != ENOENT) {
202				error_msg(gettext("Failed to open %s: %s"),
203				    path, strerror(errno));
204			}
205			return (-1);
206		}
207	}
208
209	/* add method record to in-memory list */
210	if ((me = calloc(1, sizeof (method_el_t))) == NULL) {
211		error_msg(strerror(errno));
212		(void) close(fd);
213		return (-1);
214	}
215	me->fd = fd;
216	me->inst = (instance_t *)ins;
217	me->method = mthd;
218	me->pid = pid;
219	me->cid = cid;
220	if (proto_name != NULL) {
221		if ((me->proto_name = strdup(proto_name)) == NULL) {
222			error_msg(strerror(errno));
223			free(me);
224			(void) close(fd);
225			return (-1);
226		}
227	} else
228		me->proto_name = NULL;
229
230	/* register a timeout for the method, if required */
231	if (mthd != IM_START) {
232		method_info_t *mi = ins->config->methods[mthd];
233
234		if (mi->timeout > 0) {
235			assert(ins->timer_id == -1);
236			ins->timer_id = iu_schedule_timer(timer_queue,
237			    mi->timeout, method_timeout, me);
238			if (ins->timer_id == -1) {
239				error_msg(gettext(
240				    "Failed to schedule method timeout"));
241				if (me->proto_name != NULL)
242					free(me->proto_name);
243				free(me);
244				(void) close(fd);
245				return (-1);
246			}
247		}
248	}
249
250	/*
251	 * Add fd of psinfo file to poll set, but pass 0 for events to
252	 * poll for, so we should only get a POLLHUP event on the fd.
253	 */
254	if (set_pollfd(fd, 0) == -1) {
255		cancel_inst_timer(ins);
256		if (me->proto_name != NULL)
257			free(me->proto_name);
258		free(me);
259		(void) close(fd);
260		return (-1);
261	}
262
263	uu_list_node_init(me, &me->link, method_pool);
264	(void) uu_list_insert_after(method_list, NULL, me);
265
266	return (0);
267}
268
269/*
270 * A counterpart to register_method(), this function stops the monitoring of a
271 * method process for its termination.
272 */
273static void
274unregister_method(method_el_t *me)
275{
276	/* cancel any timer associated with the method */
277	if (me->inst->timer_id != -1)
278		cancel_inst_timer(me->inst);
279
280	/* stop polling on the psinfo file fd */
281	clear_pollfd(me->fd);
282	(void) close(me->fd);
283
284	/* remove method record from list */
285	uu_list_remove(method_list, me);
286
287	if (me->proto_name != NULL)
288		free(me->proto_name);
289	free(me);
290}
291
292/*
293 * Unregister all methods associated with instance 'inst'.
294 */
295void
296unregister_instance_methods(const instance_t *inst)
297{
298	method_el_t *me = uu_list_first(method_list);
299
300	while (me != NULL) {
301		if (me->inst == inst) {
302			method_el_t *tmp = me;
303
304			me = uu_list_next(method_list, me);
305			unregister_method(tmp);
306		} else  {
307			me = uu_list_next(method_list, me);
308		}
309	}
310}
311
312/*
313 * Process any terminated methods. For each method determined to have
314 * terminated, the function determines its return value and calls the
315 * appropriate handling function, depending on the type of the method.
316 */
317void
318process_terminated_methods(void)
319{
320	method_el_t	*me = uu_list_first(method_list);
321
322	while (me != NULL) {
323		struct pollfd	*pfd;
324		pid_t		pid;
325		int		status;
326		int		ret;
327		method_el_t	*tmp;
328
329		pfd = find_pollfd(me->fd);
330
331		/*
332		 * We expect to get a POLLHUP back on the fd of the process's
333		 * open psinfo file from /proc when the method terminates.
334		 * A POLLERR could(?) mask a POLLHUP, so handle this
335		 * also.
336		 */
337		if ((pfd->revents & (POLLHUP|POLLERR)) == 0) {
338			me = uu_list_next(method_list, me);
339			continue;
340		}
341
342		/* get the method's exit code (no need to loop for EINTR) */
343		pid = waitpid(me->pid, &status, WNOHANG);
344
345		switch (pid) {
346		case 0:					/* child still around */
347			/*
348			 * Either poll() is sending us invalid POLLHUP events
349			 * or is flagging a POLLERR on the fd. Neither should
350			 * happen, but in the event they do, ignore this fd
351			 * this time around and wait out the termination
352			 * of its associated method. This may result in
353			 * inetd swiftly looping in event_loop(), but means
354			 * we don't miss the termination of a method.
355			 */
356			me = uu_list_next(method_list, me);
357			continue;
358
359		case -1:				/* non-existent child */
360			assert(errno == ECHILD);
361			/*
362			 * the method must not be owned by inetd due to it
363			 * persisting over an inetd restart. Let's assume the
364			 * best, that it was successful.
365			 */
366			ret = IMRET_SUCCESS;
367			break;
368
369		default:				/* child terminated */
370			if (WIFEXITED(status)) {
371				ret = WEXITSTATUS(status);
372				debug_msg("process %ld of instance %s returned "
373				    "%d", pid, me->inst->fmri, ret);
374			} else if (WIFSIGNALED(status)) {
375				/*
376				 * Terminated by signal.  This may be due
377				 * to a kill that we sent from a disable or
378				 * offline event. We flag it as a failure, but
379				 * this flagged failure will only be processed
380				 * in the case of non-start methods, or when
381				 * the instance is still enabled.
382				 */
383				debug_msg("process %ld of instance %s exited "
384				    "due to signal %d", pid, me->inst->fmri,
385				    WTERMSIG(status));
386				ret = IMRET_FAILURE;
387			} else {
388				/*
389				 * Can we actually get here?  Don't think so.
390				 * Treat it as a failure, anyway.
391				 */
392				debug_msg("waitpid() for %s method of "
393				    "instance %s returned %d",
394				    methods[me->method].name, me->inst->fmri,
395				    status);
396				ret = IMRET_FAILURE;
397			}
398		}
399
400		remove_method_ids(me->inst, me->pid, me->cid, me->method);
401
402		/* continue state transition processing of the instance */
403		if (me->method != IM_START) {
404			process_non_start_term(me->inst, ret);
405		} else {
406			process_start_term(me->inst, me->proto_name);
407		}
408
409		if (me->cid != -1)
410			(void) abandon_contract(me->cid);
411
412		tmp = me;
413		me = uu_list_next(method_list, me);
414		unregister_method(tmp);
415	}
416}
417