hastd.c revision 217729
1199536Smr/*-
2199536Smr * Copyright (c) 2009-2010 The FreeBSD Foundation
3199536Smr * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
4199536Smr * All rights reserved.
5199536Smr *
6199536Smr * This software was developed by Pawel Jakub Dawidek under sponsorship from
7199536Smr * the FreeBSD Foundation.
8199536Smr *
9199536Smr * Redistribution and use in source and binary forms, with or without
10199536Smr * modification, are permitted provided that the following conditions
11199536Smr * are met:
12199536Smr * 1. Redistributions of source code must retain the above copyright
13199536Smr *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sbin/hastd/hastd.c 217729 2011-01-22 22:31:55Z pjd $");
33
34#include <sys/param.h>
35#include <sys/linker.h>
36#include <sys/module.h>
37#include <sys/wait.h>
38
39#include <assert.h>
40#include <err.h>
41#include <errno.h>
42#include <libutil.h>
43#include <signal.h>
44#include <stdbool.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <sysexits.h>
49#include <unistd.h>
50
51#include <activemap.h>
52#include <pjdlog.h>
53
54#include "control.h"
55#include "event.h"
56#include "hast.h"
57#include "hast_proto.h"
58#include "hastd.h"
59#include "hooks.h"
60#include "subr.h"
61
62/* Path to configuration file. */
63const char *cfgpath = HAST_CONFIG;
64/* Hastd configuration. */
65static struct hastd_config *cfg;
66/* Was SIGINT or SIGTERM signal received? */
67bool sigexit_received = false;
68/* PID file handle. */
69struct pidfh *pfh;
70
71/* How often check for hooks running for too long. */
72#define	REPORT_INTERVAL	5
73
74static void
75usage(void)
76{
77
78	errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
79}
80
81static void
82g_gate_load(void)
83{
84
85	if (modfind("g_gate") == -1) {
86		/* Not present in kernel, try loading it. */
87		if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
88			if (errno != EEXIST) {
89				pjdlog_exit(EX_OSERR,
90				    "Unable to load geom_gate module");
91			}
92		}
93	}
94}
95
96static void
97child_exit_log(unsigned int pid, int status)
98{
99
100	if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
101		pjdlog_debug(1, "Worker process exited gracefully (pid=%u).",
102		    pid);
103	} else if (WIFSIGNALED(status)) {
104		pjdlog_error("Worker process killed (pid=%u, signal=%d).",
105		    pid, WTERMSIG(status));
106	} else {
107		pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).",
108		    pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1);
109	}
110}
111
112static void
113child_exit(void)
114{
115	struct hast_resource *res;
116	int status;
117	pid_t pid;
118
119	while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
120		/* Find resource related to the process that just exited. */
121		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
122			if (pid == res->hr_workerpid)
123				break;
124		}
125		if (res == NULL) {
126			/*
127			 * This can happen when new connection arrives and we
128			 * cancel child responsible for the old one or if this
129			 * was hook which we executed.
130			 */
131			hook_check_one(pid, status);
132			continue;
133		}
134		pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
135		    role2str(res->hr_role));
136		child_exit_log(pid, status);
137		child_cleanup(res);
138		if (res->hr_role == HAST_ROLE_PRIMARY) {
139			/*
140			 * Restart child process if it was killed by signal
141			 * or exited because of temporary problem.
142			 */
143			if (WIFSIGNALED(status) ||
144			    (WIFEXITED(status) &&
145			     WEXITSTATUS(status) == EX_TEMPFAIL)) {
146				sleep(1);
147				pjdlog_info("Restarting worker process.");
148				hastd_primary(res);
149			} else {
150				res->hr_role = HAST_ROLE_INIT;
151				pjdlog_info("Changing resource role back to %s.",
152				    role2str(res->hr_role));
153			}
154		}
155		pjdlog_prefix_set("%s", "");
156	}
157}
158
159static bool
160resource_needs_restart(const struct hast_resource *res0,
161    const struct hast_resource *res1)
162{
163
164	assert(strcmp(res0->hr_name, res1->hr_name) == 0);
165
166	if (strcmp(res0->hr_provname, res1->hr_provname) != 0)
167		return (true);
168	if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0)
169		return (true);
170	if (res0->hr_role == HAST_ROLE_INIT ||
171	    res0->hr_role == HAST_ROLE_SECONDARY) {
172		if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
173			return (true);
174		if (res0->hr_replication != res1->hr_replication)
175			return (true);
176		if (res0->hr_timeout != res1->hr_timeout)
177			return (true);
178		if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
179			return (true);
180	}
181	return (false);
182}
183
184static bool
185resource_needs_reload(const struct hast_resource *res0,
186    const struct hast_resource *res1)
187{
188
189	assert(strcmp(res0->hr_name, res1->hr_name) == 0);
190	assert(strcmp(res0->hr_provname, res1->hr_provname) == 0);
191	assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0);
192
193	if (res0->hr_role != HAST_ROLE_PRIMARY)
194		return (false);
195
196	if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
197		return (true);
198	if (res0->hr_replication != res1->hr_replication)
199		return (true);
200	if (res0->hr_timeout != res1->hr_timeout)
201		return (true);
202	if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
203		return (true);
204	return (false);
205}
206
207static void
208hastd_reload(void)
209{
210	struct hastd_config *newcfg;
211	struct hast_resource *nres, *cres, *tres;
212	uint8_t role;
213
214	pjdlog_info("Reloading configuration...");
215
216	newcfg = yy_config_parse(cfgpath, false);
217	if (newcfg == NULL)
218		goto failed;
219
220	/*
221	 * Check if control address has changed.
222	 */
223	if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) {
224		if (proto_server(newcfg->hc_controladdr,
225		    &newcfg->hc_controlconn) < 0) {
226			pjdlog_errno(LOG_ERR,
227			    "Unable to listen on control address %s",
228			    newcfg->hc_controladdr);
229			goto failed;
230		}
231	}
232	/*
233	 * Check if listen address has changed.
234	 */
235	if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) {
236		if (proto_server(newcfg->hc_listenaddr,
237		    &newcfg->hc_listenconn) < 0) {
238			pjdlog_errno(LOG_ERR, "Unable to listen on address %s",
239			    newcfg->hc_listenaddr);
240			goto failed;
241		}
242	}
243	/*
244	 * Only when both control and listen sockets are successfully
245	 * initialized switch them to new configuration.
246	 */
247	if (newcfg->hc_controlconn != NULL) {
248		pjdlog_info("Control socket changed from %s to %s.",
249		    cfg->hc_controladdr, newcfg->hc_controladdr);
250		proto_close(cfg->hc_controlconn);
251		cfg->hc_controlconn = newcfg->hc_controlconn;
252		newcfg->hc_controlconn = NULL;
253		strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr,
254		    sizeof(cfg->hc_controladdr));
255	}
256	if (newcfg->hc_listenconn != NULL) {
257		pjdlog_info("Listen socket changed from %s to %s.",
258		    cfg->hc_listenaddr, newcfg->hc_listenaddr);
259		proto_close(cfg->hc_listenconn);
260		cfg->hc_listenconn = newcfg->hc_listenconn;
261		newcfg->hc_listenconn = NULL;
262		strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr,
263		    sizeof(cfg->hc_listenaddr));
264	}
265
266	/*
267	 * Stop and remove resources that were removed from the configuration.
268	 */
269	TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) {
270		TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) {
271			if (strcmp(cres->hr_name, nres->hr_name) == 0)
272				break;
273		}
274		if (nres == NULL) {
275			control_set_role(cres, HAST_ROLE_INIT);
276			TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
277			pjdlog_info("Resource %s removed.", cres->hr_name);
278			free(cres);
279		}
280	}
281	/*
282	 * Move new resources to the current configuration.
283	 */
284	TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
285		TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
286			if (strcmp(cres->hr_name, nres->hr_name) == 0)
287				break;
288		}
289		if (cres == NULL) {
290			TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
291			TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
292			pjdlog_info("Resource %s added.", nres->hr_name);
293		}
294	}
295	/*
296	 * Deal with modified resources.
297	 * Depending on what has changed exactly we might want to perform
298	 * different actions.
299	 *
300	 * We do full resource restart in the following situations:
301	 * Resource role is INIT or SECONDARY.
302	 * Resource role is PRIMARY and path to local component or provider
303	 * name has changed.
304	 * In case of PRIMARY, the worker process will be killed and restarted,
305	 * which also means removing /dev/hast/<name> provider and
306	 * recreating it.
307	 *
308	 * We do just reload (send SIGHUP to worker process) if we act as
309	 * PRIMARY, but only if remote address, replication mode, timeout or
310	 * execution path has changed. For those, there is no need to restart
311	 * worker process.
312	 * If PRIMARY receives SIGHUP, it will reconnect if remote address or
313	 * replication mode has changed or simply set new timeout if only
314	 * timeout has changed.
315	 */
316	TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
317		TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
318			if (strcmp(cres->hr_name, nres->hr_name) == 0)
319				break;
320		}
321		assert(cres != NULL);
322		if (resource_needs_restart(cres, nres)) {
323			pjdlog_info("Resource %s configuration was modified, restarting it.",
324			    cres->hr_name);
325			role = cres->hr_role;
326			control_set_role(cres, HAST_ROLE_INIT);
327			TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
328			free(cres);
329			TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
330			TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
331			control_set_role(nres, role);
332		} else if (resource_needs_reload(cres, nres)) {
333			pjdlog_info("Resource %s configuration was modified, reloading it.",
334			    cres->hr_name);
335			strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr,
336			    sizeof(cres->hr_remoteaddr));
337			cres->hr_replication = nres->hr_replication;
338			cres->hr_timeout = nres->hr_timeout;
339			strlcpy(cres->hr_exec, nres->hr_exec,
340			    sizeof(cres->hr_exec));
341			if (cres->hr_workerpid != 0) {
342				if (kill(cres->hr_workerpid, SIGHUP) < 0) {
343					pjdlog_errno(LOG_WARNING,
344					    "Unable to send SIGHUP to worker process %u",
345					    (unsigned int)cres->hr_workerpid);
346				}
347			}
348		}
349	}
350
351	yy_config_free(newcfg);
352	pjdlog_info("Configuration reloaded successfully.");
353	return;
354failed:
355	if (newcfg != NULL) {
356		if (newcfg->hc_controlconn != NULL)
357			proto_close(newcfg->hc_controlconn);
358		if (newcfg->hc_listenconn != NULL)
359			proto_close(newcfg->hc_listenconn);
360		yy_config_free(newcfg);
361	}
362	pjdlog_warning("Configuration not reloaded.");
363}
364
365static void
366terminate_workers(void)
367{
368	struct hast_resource *res;
369
370	pjdlog_info("Termination signal received, exiting.");
371	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
372		if (res->hr_workerpid == 0)
373			continue;
374		pjdlog_info("Terminating worker process (resource=%s, role=%s, pid=%u).",
375		    res->hr_name, role2str(res->hr_role), res->hr_workerpid);
376		if (kill(res->hr_workerpid, SIGTERM) == 0)
377			continue;
378		pjdlog_errno(LOG_WARNING,
379		    "Unable to send signal to worker process (resource=%s, role=%s, pid=%u).",
380		    res->hr_name, role2str(res->hr_role), res->hr_workerpid);
381	}
382}
383
384static void
385listen_accept(void)
386{
387	struct hast_resource *res;
388	struct proto_conn *conn;
389	struct nv *nvin, *nvout, *nverr;
390	const char *resname;
391	const unsigned char *token;
392	char laddr[256], raddr[256];
393	size_t size;
394	pid_t pid;
395	int status;
396
397	proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr));
398	pjdlog_debug(1, "Accepting connection to %s.", laddr);
399
400	if (proto_accept(cfg->hc_listenconn, &conn) < 0) {
401		pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
402		return;
403	}
404
405	proto_local_address(conn, laddr, sizeof(laddr));
406	proto_remote_address(conn, raddr, sizeof(raddr));
407	pjdlog_info("Connection from %s to %s.", raddr, laddr);
408
409	/* Error in setting timeout is not critical, but why should it fail? */
410	if (proto_timeout(conn, HAST_TIMEOUT) < 0)
411		pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
412
413	nvin = nvout = nverr = NULL;
414
415	/*
416	 * Before receiving any data see if remote host have access to any
417	 * resource.
418	 */
419	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
420		if (proto_address_match(conn, res->hr_remoteaddr))
421			break;
422	}
423	if (res == NULL) {
424		pjdlog_error("Client %s isn't known.", raddr);
425		goto close;
426	}
427	/* Ok, remote host can access at least one resource. */
428
429	if (hast_proto_recv_hdr(conn, &nvin) < 0) {
430		pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
431		    raddr);
432		goto close;
433	}
434
435	resname = nv_get_string(nvin, "resource");
436	if (resname == NULL) {
437		pjdlog_error("No 'resource' field in the header received from %s.",
438		    raddr);
439		goto close;
440	}
441	pjdlog_debug(2, "%s: resource=%s", raddr, resname);
442	token = nv_get_uint8_array(nvin, &size, "token");
443	/*
444	 * NULL token means that this is first conection.
445	 */
446	if (token != NULL && size != sizeof(res->hr_token)) {
447		pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
448		    raddr, sizeof(res->hr_token), size);
449		goto close;
450	}
451
452	/*
453	 * From now on we want to send errors to the remote node.
454	 */
455	nverr = nv_alloc();
456
457	/* Find resource related to this connection. */
458	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
459		if (strcmp(resname, res->hr_name) == 0)
460			break;
461	}
462	/* Have we found the resource? */
463	if (res == NULL) {
464		pjdlog_error("No resource '%s' as requested by %s.",
465		    resname, raddr);
466		nv_add_stringf(nverr, "errmsg", "Resource not configured.");
467		goto fail;
468	}
469
470	/* Now that we know resource name setup log prefix. */
471	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
472
473	/* Does the remote host have access to this resource? */
474	if (!proto_address_match(conn, res->hr_remoteaddr)) {
475		pjdlog_error("Client %s has no access to the resource.", raddr);
476		nv_add_stringf(nverr, "errmsg", "No access to the resource.");
477		goto fail;
478	}
479	/* Is the resource marked as secondary? */
480	if (res->hr_role != HAST_ROLE_SECONDARY) {
481		pjdlog_error("We act as %s for the resource and not as %s as requested by %s.",
482		    role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
483		    raddr);
484		nv_add_stringf(nverr, "errmsg",
485		    "Remote node acts as %s for the resource and not as %s.",
486		    role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
487		goto fail;
488	}
489	/* Does token (if exists) match? */
490	if (token != NULL && memcmp(token, res->hr_token,
491	    sizeof(res->hr_token)) != 0) {
492		pjdlog_error("Token received from %s doesn't match.", raddr);
493		nv_add_stringf(nverr, "errmsg", "Token doesn't match.");
494		goto fail;
495	}
496	/*
497	 * If there is no token, but we have half-open connection
498	 * (only remotein) or full connection (worker process is running)
499	 * we have to cancel those and accept the new connection.
500	 */
501	if (token == NULL) {
502		assert(res->hr_remoteout == NULL);
503		pjdlog_debug(1, "Initial connection from %s.", raddr);
504		if (res->hr_workerpid != 0) {
505			assert(res->hr_remotein == NULL);
506			pjdlog_debug(1,
507			    "Worker process exists (pid=%u), stopping it.",
508			    (unsigned int)res->hr_workerpid);
509			/* Stop child process. */
510			if (kill(res->hr_workerpid, SIGINT) < 0) {
511				pjdlog_errno(LOG_ERR,
512				    "Unable to stop worker process (pid=%u)",
513				    (unsigned int)res->hr_workerpid);
514				/*
515				 * Other than logging the problem we
516				 * ignore it - nothing smart to do.
517				 */
518			}
519			/* Wait for it to exit. */
520			else if ((pid = waitpid(res->hr_workerpid,
521			    &status, 0)) != res->hr_workerpid) {
522				/* We can only log the problem. */
523				pjdlog_errno(LOG_ERR,
524				    "Waiting for worker process (pid=%u) failed",
525				    (unsigned int)res->hr_workerpid);
526			} else {
527				child_exit_log(res->hr_workerpid, status);
528			}
529			child_cleanup(res);
530		} else if (res->hr_remotein != NULL) {
531			char oaddr[256];
532
533			proto_remote_address(res->hr_remotein, oaddr,
534			    sizeof(oaddr));
535			pjdlog_debug(1,
536			    "Canceling half-open connection from %s on connection from %s.",
537			    oaddr, raddr);
538			proto_close(res->hr_remotein);
539			res->hr_remotein = NULL;
540		}
541	}
542
543	/*
544	 * Checks and cleanups are done.
545	 */
546
547	if (token == NULL) {
548		arc4random_buf(res->hr_token, sizeof(res->hr_token));
549		nvout = nv_alloc();
550		nv_add_uint8_array(nvout, res->hr_token,
551		    sizeof(res->hr_token), "token");
552		if (nv_error(nvout) != 0) {
553			pjdlog_common(LOG_ERR, 0, nv_error(nvout),
554			    "Unable to prepare return header for %s", raddr);
555			nv_add_stringf(nverr, "errmsg",
556			    "Remote node was unable to prepare return header: %s.",
557			    strerror(nv_error(nvout)));
558			goto fail;
559		}
560		if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) {
561			int error = errno;
562
563			pjdlog_errno(LOG_ERR, "Unable to send response to %s",
564			    raddr);
565			nv_add_stringf(nverr, "errmsg",
566			    "Remote node was unable to send response: %s.",
567			    strerror(error));
568			goto fail;
569		}
570		res->hr_remotein = conn;
571		pjdlog_debug(1, "Incoming connection from %s configured.",
572		    raddr);
573	} else {
574		res->hr_remoteout = conn;
575		pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
576		hastd_secondary(res, nvin);
577	}
578	nv_free(nvin);
579	nv_free(nvout);
580	nv_free(nverr);
581	pjdlog_prefix_set("%s", "");
582	return;
583fail:
584	if (nv_error(nverr) != 0) {
585		pjdlog_common(LOG_ERR, 0, nv_error(nverr),
586		    "Unable to prepare error header for %s", raddr);
587		goto close;
588	}
589	if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) {
590		pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
591		goto close;
592	}
593close:
594	if (nvin != NULL)
595		nv_free(nvin);
596	if (nvout != NULL)
597		nv_free(nvout);
598	if (nverr != NULL)
599		nv_free(nverr);
600	proto_close(conn);
601	pjdlog_prefix_set("%s", "");
602}
603
604static void
605main_loop(void)
606{
607	struct hast_resource *res;
608	struct timeval seltimeout;
609	struct timespec sigtimeout;
610	int fd, maxfd, ret, signo;
611	sigset_t mask;
612	fd_set rfds;
613
614	seltimeout.tv_sec = REPORT_INTERVAL;
615	seltimeout.tv_usec = 0;
616	sigtimeout.tv_sec = 0;
617	sigtimeout.tv_nsec = 0;
618
619	PJDLOG_VERIFY(sigemptyset(&mask) == 0);
620	PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
621	PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);
622	PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
623	PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
624
625	pjdlog_info("Started successfully, running protocol version %d.",
626	    HAST_PROTO_VERSION);
627
628	for (;;) {
629		while ((signo = sigtimedwait(&mask, NULL, &sigtimeout)) != -1) {
630			switch (signo) {
631			case SIGINT:
632			case SIGTERM:
633				sigexit_received = true;
634				terminate_workers();
635				exit(EX_OK);
636				break;
637			case SIGCHLD:
638				child_exit();
639				break;
640			case SIGHUP:
641				hastd_reload();
642				break;
643			default:
644				assert(!"invalid condition");
645			}
646		}
647
648		/* Setup descriptors for select(2). */
649		FD_ZERO(&rfds);
650		maxfd = fd = proto_descriptor(cfg->hc_controlconn);
651		assert(fd >= 0);
652		FD_SET(fd, &rfds);
653		fd = proto_descriptor(cfg->hc_listenconn);
654		assert(fd >= 0);
655		FD_SET(fd, &rfds);
656		maxfd = fd > maxfd ? fd : maxfd;
657		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
658			if (res->hr_event == NULL)
659				continue;
660			fd = proto_descriptor(res->hr_event);
661			assert(fd >= 0);
662			FD_SET(fd, &rfds);
663			maxfd = fd > maxfd ? fd : maxfd;
664		}
665
666		assert(maxfd + 1 <= (int)FD_SETSIZE);
667		ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout);
668		if (ret == 0)
669			hook_check();
670		else if (ret == -1) {
671			if (errno == EINTR)
672				continue;
673			KEEP_ERRNO((void)pidfile_remove(pfh));
674			pjdlog_exit(EX_OSERR, "select() failed");
675		}
676
677		if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds))
678			control_handle(cfg);
679		if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds))
680			listen_accept();
681		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
682			if (res->hr_event == NULL)
683				continue;
684			if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) {
685				if (event_recv(res) == 0)
686					continue;
687				/* The worker process exited? */
688				proto_close(res->hr_event);
689				res->hr_event = NULL;
690			}
691		}
692	}
693}
694
695static void
696dummy_sighandler(int sig __unused)
697{
698	/* Nothing to do. */
699}
700
701int
702main(int argc, char *argv[])
703{
704	const char *pidfile;
705	pid_t otherpid;
706	bool foreground;
707	int debuglevel;
708	sigset_t mask;
709
710	foreground = false;
711	debuglevel = 0;
712	pidfile = HASTD_PIDFILE;
713
714	for (;;) {
715		int ch;
716
717		ch = getopt(argc, argv, "c:dFhP:");
718		if (ch == -1)
719			break;
720		switch (ch) {
721		case 'c':
722			cfgpath = optarg;
723			break;
724		case 'd':
725			debuglevel++;
726			break;
727		case 'F':
728			foreground = true;
729			break;
730		case 'P':
731			pidfile = optarg;
732			break;
733		case 'h':
734		default:
735			usage();
736		}
737	}
738	argc -= optind;
739	argv += optind;
740
741	pjdlog_debug_set(debuglevel);
742
743	g_gate_load();
744
745	pfh = pidfile_open(pidfile, 0600, &otherpid);
746	if (pfh == NULL) {
747		if (errno == EEXIST) {
748			pjdlog_exitx(EX_TEMPFAIL,
749			    "Another hastd is already running, pid: %jd.",
750			    (intmax_t)otherpid);
751		}
752		/* If we cannot create pidfile from other reasons, only warn. */
753		pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile");
754	}
755
756	cfg = yy_config_parse(cfgpath, true);
757	assert(cfg != NULL);
758
759	/*
760	 * Restore default actions for interesting signals in case parent
761	 * process (like init(8)) decided to ignore some of them (like SIGHUP).
762	 */
763	PJDLOG_VERIFY(signal(SIGHUP, SIG_DFL) != SIG_ERR);
764	PJDLOG_VERIFY(signal(SIGINT, SIG_DFL) != SIG_ERR);
765	PJDLOG_VERIFY(signal(SIGTERM, SIG_DFL) != SIG_ERR);
766	/*
767	 * Because SIGCHLD is ignored by default, setup dummy handler for it,
768	 * so we can mask it.
769	 */
770	PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR);
771
772	PJDLOG_VERIFY(sigemptyset(&mask) == 0);
773	PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
774	PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);
775	PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
776	PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
777	PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
778
779	/* Listen on control address. */
780	if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) {
781		KEEP_ERRNO((void)pidfile_remove(pfh));
782		pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
783		    cfg->hc_controladdr);
784	}
785	/* Listen for remote connections. */
786	if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) {
787		KEEP_ERRNO((void)pidfile_remove(pfh));
788		pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
789		    cfg->hc_listenaddr);
790	}
791
792	if (!foreground) {
793		if (daemon(0, 0) < 0) {
794			KEEP_ERRNO((void)pidfile_remove(pfh));
795			pjdlog_exit(EX_OSERR, "Unable to daemonize");
796		}
797
798		/* Start logging to syslog. */
799		pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
800
801		/* Write PID to a file. */
802		if (pidfile_write(pfh) < 0) {
803			pjdlog_errno(LOG_WARNING,
804			    "Unable to write PID to a file");
805		}
806	}
807
808	hook_init();
809
810	main_loop();
811
812	exit(0);
813}
814