hastd.c revision 212038
1/*-
2 * Copyright (c) 2009-2010 The FreeBSD Foundation
3 * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
4 * All rights reserved.
5 *
6 * This software was developed by Pawel Jakub Dawidek under sponsorship from
7 * the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sbin/hastd/hastd.c 212038 2010-08-30 23:26:10Z pjd $");
33
34#include <sys/param.h>
35#include <sys/linker.h>
36#include <sys/module.h>
37#include <sys/wait.h>
38
39#include <assert.h>
40#include <err.h>
41#include <errno.h>
42#include <libutil.h>
43#include <signal.h>
44#include <stdbool.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <sysexits.h>
49#include <unistd.h>
50
51#include <activemap.h>
52#include <pjdlog.h>
53
54#include "control.h"
55#include "event.h"
56#include "hast.h"
57#include "hast_proto.h"
58#include "hastd.h"
59#include "hooks.h"
60#include "subr.h"
61
62/* Path to configuration file. */
63const char *cfgpath = HAST_CONFIG;
64/* Hastd configuration. */
65static struct hastd_config *cfg;
66/* Was SIGCHLD signal received? */
67bool sigchld_received = false;
68/* Was SIGHUP signal received? */
69bool sighup_received = false;
70/* Was SIGINT or SIGTERM signal received? */
71bool sigexit_received = false;
72/* PID file handle. */
73struct pidfh *pfh;
74
75/* How often check for hooks running for too long. */
76#define	REPORT_INTERVAL	10
77
78static void
79usage(void)
80{
81
82	errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
83}
84
85static void
86sighandler(int sig)
87{
88
89	switch (sig) {
90	case SIGINT:
91	case SIGTERM:
92		sigexit_received = true;
93		break;
94	case SIGCHLD:
95		sigchld_received = true;
96		break;
97	case SIGHUP:
98		sighup_received = true;
99		break;
100	default:
101		assert(!"invalid condition");
102	}
103}
104
105static void
106g_gate_load(void)
107{
108
109	if (modfind("g_gate") == -1) {
110		/* Not present in kernel, try loading it. */
111		if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
112			if (errno != EEXIST) {
113				pjdlog_exit(EX_OSERR,
114				    "Unable to load geom_gate module");
115			}
116		}
117	}
118}
119
120static void
121child_exit_log(unsigned int pid, int status)
122{
123
124	if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
125		pjdlog_debug(1, "Worker process exited gracefully (pid=%u).",
126		    pid);
127	} else if (WIFSIGNALED(status)) {
128		pjdlog_error("Worker process killed (pid=%u, signal=%d).",
129		    pid, WTERMSIG(status));
130	} else {
131		pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).",
132		    pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1);
133	}
134}
135
136static void
137child_exit(void)
138{
139	struct hast_resource *res;
140	int status;
141	pid_t pid;
142
143	while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
144		/* Find resource related to the process that just exited. */
145		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
146			if (pid == res->hr_workerpid)
147				break;
148		}
149		if (res == NULL) {
150			/*
151			 * This can happen when new connection arrives and we
152			 * cancel child responsible for the old one or if this
153			 * was hook which we executed.
154			 */
155			hook_check_one(pid, status);
156			continue;
157		}
158		pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
159		    role2str(res->hr_role));
160		child_exit_log(pid, status);
161		proto_close(res->hr_ctrl);
162		res->hr_ctrl = NULL;
163		if (res->hr_event != NULL) {
164			proto_close(res->hr_event);
165			res->hr_event = NULL;
166		}
167		res->hr_workerpid = 0;
168		if (res->hr_role == HAST_ROLE_PRIMARY) {
169			/*
170			 * Restart child process if it was killed by signal
171			 * or exited because of temporary problem.
172			 */
173			if (WIFSIGNALED(status) ||
174			    (WIFEXITED(status) &&
175			     WEXITSTATUS(status) == EX_TEMPFAIL)) {
176				sleep(1);
177				pjdlog_info("Restarting worker process.");
178				hastd_primary(res);
179			} else {
180				res->hr_role = HAST_ROLE_INIT;
181				pjdlog_info("Changing resource role back to %s.",
182				    role2str(res->hr_role));
183			}
184		}
185		pjdlog_prefix_set("%s", "");
186	}
187}
188
189static bool
190resource_needs_restart(const struct hast_resource *res0,
191    const struct hast_resource *res1)
192{
193
194	assert(strcmp(res0->hr_name, res1->hr_name) == 0);
195
196	if (strcmp(res0->hr_provname, res1->hr_provname) != 0)
197		return (true);
198	if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0)
199		return (true);
200	if (res0->hr_role == HAST_ROLE_INIT ||
201	    res0->hr_role == HAST_ROLE_SECONDARY) {
202		if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
203			return (true);
204		if (res0->hr_replication != res1->hr_replication)
205			return (true);
206		if (res0->hr_timeout != res1->hr_timeout)
207			return (true);
208		if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
209			return (true);
210	}
211	return (false);
212}
213
214static bool
215resource_needs_reload(const struct hast_resource *res0,
216    const struct hast_resource *res1)
217{
218
219	assert(strcmp(res0->hr_name, res1->hr_name) == 0);
220	assert(strcmp(res0->hr_provname, res1->hr_provname) == 0);
221	assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0);
222
223	if (res0->hr_role != HAST_ROLE_PRIMARY)
224		return (false);
225
226	if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
227		return (true);
228	if (res0->hr_replication != res1->hr_replication)
229		return (true);
230	if (res0->hr_timeout != res1->hr_timeout)
231		return (true);
232	if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
233		return (true);
234	return (false);
235}
236
237static void
238hastd_reload(void)
239{
240	struct hastd_config *newcfg;
241	struct hast_resource *nres, *cres, *tres;
242	uint8_t role;
243
244	pjdlog_info("Reloading configuration...");
245
246	newcfg = yy_config_parse(cfgpath, false);
247	if (newcfg == NULL)
248		goto failed;
249
250	/*
251	 * Check if control address has changed.
252	 */
253	if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) {
254		if (proto_server(newcfg->hc_controladdr,
255		    &newcfg->hc_controlconn) < 0) {
256			pjdlog_errno(LOG_ERR,
257			    "Unable to listen on control address %s",
258			    newcfg->hc_controladdr);
259			goto failed;
260		}
261	}
262	/*
263	 * Check if listen address has changed.
264	 */
265	if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) {
266		if (proto_server(newcfg->hc_listenaddr,
267		    &newcfg->hc_listenconn) < 0) {
268			pjdlog_errno(LOG_ERR, "Unable to listen on address %s",
269			    newcfg->hc_listenaddr);
270			goto failed;
271		}
272	}
273	/*
274	 * Only when both control and listen sockets are successfully
275	 * initialized switch them to new configuration.
276	 */
277	if (newcfg->hc_controlconn != NULL) {
278		pjdlog_info("Control socket changed from %s to %s.",
279		    cfg->hc_controladdr, newcfg->hc_controladdr);
280		proto_close(cfg->hc_controlconn);
281		cfg->hc_controlconn = newcfg->hc_controlconn;
282		newcfg->hc_controlconn = NULL;
283		strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr,
284		    sizeof(cfg->hc_controladdr));
285	}
286	if (newcfg->hc_listenconn != NULL) {
287		pjdlog_info("Listen socket changed from %s to %s.",
288		    cfg->hc_listenaddr, newcfg->hc_listenaddr);
289		proto_close(cfg->hc_listenconn);
290		cfg->hc_listenconn = newcfg->hc_listenconn;
291		newcfg->hc_listenconn = NULL;
292		strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr,
293		    sizeof(cfg->hc_listenaddr));
294	}
295
296	/*
297	 * Stop and remove resources that were removed from the configuration.
298	 */
299	TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) {
300		TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) {
301			if (strcmp(cres->hr_name, nres->hr_name) == 0)
302				break;
303		}
304		if (nres == NULL) {
305			control_set_role(cres, HAST_ROLE_INIT);
306			TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
307			pjdlog_info("Resource %s removed.", cres->hr_name);
308			free(cres);
309		}
310	}
311	/*
312	 * Move new resources to the current configuration.
313	 */
314	TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
315		TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
316			if (strcmp(cres->hr_name, nres->hr_name) == 0)
317				break;
318		}
319		if (cres == NULL) {
320			TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
321			TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
322			pjdlog_info("Resource %s added.", nres->hr_name);
323		}
324	}
325	/*
326	 * Deal with modified resources.
327	 * Depending on what has changed exactly we might want to perform
328	 * different actions.
329	 *
330	 * We do full resource restart in the following situations:
331	 * Resource role is INIT or SECONDARY.
332	 * Resource role is PRIMARY and path to local component or provider
333	 * name has changed.
334	 * In case of PRIMARY, the worker process will be killed and restarted,
335	 * which also means removing /dev/hast/<name> provider and
336	 * recreating it.
337	 *
338	 * We do just reload (send SIGHUP to worker process) if we act as
339	 * PRIMARY, but only remote address, replication mode and timeout
340	 * has changed. For those, there is no need to restart worker process.
341	 * If PRIMARY receives SIGHUP, it will reconnect if remote address or
342	 * replication mode has changed or simply set new timeout if only
343	 * timeout has changed.
344	 */
345	TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
346		TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
347			if (strcmp(cres->hr_name, nres->hr_name) == 0)
348				break;
349		}
350		assert(cres != NULL);
351		if (resource_needs_restart(cres, nres)) {
352			pjdlog_info("Resource %s configuration was modified, restarting it.",
353			    cres->hr_name);
354			role = cres->hr_role;
355			control_set_role(cres, HAST_ROLE_INIT);
356			TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
357			free(cres);
358			TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
359			TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
360			control_set_role(nres, role);
361		} else if (resource_needs_reload(cres, nres)) {
362			pjdlog_info("Resource %s configuration was modified, reloading it.",
363			    cres->hr_name);
364			strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr,
365			    sizeof(cres->hr_remoteaddr));
366			cres->hr_replication = nres->hr_replication;
367			cres->hr_timeout = nres->hr_timeout;
368			if (cres->hr_workerpid != 0) {
369				if (kill(cres->hr_workerpid, SIGHUP) < 0) {
370					pjdlog_errno(LOG_WARNING,
371					    "Unable to send SIGHUP to worker process %u",
372					    (unsigned int)cres->hr_workerpid);
373				}
374			}
375		}
376	}
377
378	yy_config_free(newcfg);
379	pjdlog_info("Configuration reloaded successfully.");
380	return;
381failed:
382	if (newcfg != NULL) {
383		if (newcfg->hc_controlconn != NULL)
384			proto_close(newcfg->hc_controlconn);
385		if (newcfg->hc_listenconn != NULL)
386			proto_close(newcfg->hc_listenconn);
387		yy_config_free(newcfg);
388	}
389	pjdlog_warning("Configuration not reloaded.");
390}
391
392static void
393terminate_workers(void)
394{
395	struct hast_resource *res;
396
397	pjdlog_info("Termination signal received, exiting.");
398	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
399		if (res->hr_workerpid == 0)
400			continue;
401		pjdlog_info("Terminating worker process (resource=%s, role=%s, pid=%u).",
402		    res->hr_name, role2str(res->hr_role), res->hr_workerpid);
403		if (kill(res->hr_workerpid, SIGTERM) == 0)
404			continue;
405		pjdlog_errno(LOG_WARNING,
406		    "Unable to send signal to worker process (resource=%s, role=%s, pid=%u).",
407		    res->hr_name, role2str(res->hr_role), res->hr_workerpid);
408	}
409}
410
411static void
412listen_accept(void)
413{
414	struct hast_resource *res;
415	struct proto_conn *conn;
416	struct nv *nvin, *nvout, *nverr;
417	const char *resname;
418	const unsigned char *token;
419	char laddr[256], raddr[256];
420	size_t size;
421	pid_t pid;
422	int status;
423
424	proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr));
425	pjdlog_debug(1, "Accepting connection to %s.", laddr);
426
427	if (proto_accept(cfg->hc_listenconn, &conn) < 0) {
428		pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
429		return;
430	}
431
432	proto_local_address(conn, laddr, sizeof(laddr));
433	proto_remote_address(conn, raddr, sizeof(raddr));
434	pjdlog_info("Connection from %s to %s.", raddr, laddr);
435
436	/* Error in setting timeout is not critical, but why should it fail? */
437	if (proto_timeout(conn, HAST_TIMEOUT) < 0)
438		pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
439
440	nvin = nvout = nverr = NULL;
441
442	/*
443	 * Before receiving any data see if remote host have access to any
444	 * resource.
445	 */
446	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
447		if (proto_address_match(conn, res->hr_remoteaddr))
448			break;
449	}
450	if (res == NULL) {
451		pjdlog_error("Client %s isn't known.", raddr);
452		goto close;
453	}
454	/* Ok, remote host can access at least one resource. */
455
456	if (hast_proto_recv_hdr(conn, &nvin) < 0) {
457		pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
458		    raddr);
459		goto close;
460	}
461
462	resname = nv_get_string(nvin, "resource");
463	if (resname == NULL) {
464		pjdlog_error("No 'resource' field in the header received from %s.",
465		    raddr);
466		goto close;
467	}
468	pjdlog_debug(2, "%s: resource=%s", raddr, resname);
469	token = nv_get_uint8_array(nvin, &size, "token");
470	/*
471	 * NULL token means that this is first conection.
472	 */
473	if (token != NULL && size != sizeof(res->hr_token)) {
474		pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
475		    raddr, sizeof(res->hr_token), size);
476		goto close;
477	}
478
479	/*
480	 * From now on we want to send errors to the remote node.
481	 */
482	nverr = nv_alloc();
483
484	/* Find resource related to this connection. */
485	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
486		if (strcmp(resname, res->hr_name) == 0)
487			break;
488	}
489	/* Have we found the resource? */
490	if (res == NULL) {
491		pjdlog_error("No resource '%s' as requested by %s.",
492		    resname, raddr);
493		nv_add_stringf(nverr, "errmsg", "Resource not configured.");
494		goto fail;
495	}
496
497	/* Now that we know resource name setup log prefix. */
498	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
499
500	/* Does the remote host have access to this resource? */
501	if (!proto_address_match(conn, res->hr_remoteaddr)) {
502		pjdlog_error("Client %s has no access to the resource.", raddr);
503		nv_add_stringf(nverr, "errmsg", "No access to the resource.");
504		goto fail;
505	}
506	/* Is the resource marked as secondary? */
507	if (res->hr_role != HAST_ROLE_SECONDARY) {
508		pjdlog_error("We act as %s for the resource and not as %s as requested by %s.",
509		    role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
510		    raddr);
511		nv_add_stringf(nverr, "errmsg",
512		    "Remote node acts as %s for the resource and not as %s.",
513		    role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
514		goto fail;
515	}
516	/* Does token (if exists) match? */
517	if (token != NULL && memcmp(token, res->hr_token,
518	    sizeof(res->hr_token)) != 0) {
519		pjdlog_error("Token received from %s doesn't match.", raddr);
520		nv_add_stringf(nverr, "errmsg", "Token doesn't match.");
521		goto fail;
522	}
523	/*
524	 * If there is no token, but we have half-open connection
525	 * (only remotein) or full connection (worker process is running)
526	 * we have to cancel those and accept the new connection.
527	 */
528	if (token == NULL) {
529		assert(res->hr_remoteout == NULL);
530		pjdlog_debug(1, "Initial connection from %s.", raddr);
531		if (res->hr_workerpid != 0) {
532			assert(res->hr_remotein == NULL);
533			pjdlog_debug(1,
534			    "Worker process exists (pid=%u), stopping it.",
535			    (unsigned int)res->hr_workerpid);
536			/* Stop child process. */
537			if (kill(res->hr_workerpid, SIGINT) < 0) {
538				pjdlog_errno(LOG_ERR,
539				    "Unable to stop worker process (pid=%u)",
540				    (unsigned int)res->hr_workerpid);
541				/*
542				 * Other than logging the problem we
543				 * ignore it - nothing smart to do.
544				 */
545			}
546			/* Wait for it to exit. */
547			else if ((pid = waitpid(res->hr_workerpid,
548			    &status, 0)) != res->hr_workerpid) {
549				/* We can only log the problem. */
550				pjdlog_errno(LOG_ERR,
551				    "Waiting for worker process (pid=%u) failed",
552				    (unsigned int)res->hr_workerpid);
553			} else {
554				child_exit_log(res->hr_workerpid, status);
555			}
556			res->hr_workerpid = 0;
557		} else if (res->hr_remotein != NULL) {
558			char oaddr[256];
559
560			proto_remote_address(conn, oaddr, sizeof(oaddr));
561			pjdlog_debug(1,
562			    "Canceling half-open connection from %s on connection from %s.",
563			    oaddr, raddr);
564			proto_close(res->hr_remotein);
565			res->hr_remotein = NULL;
566		}
567	}
568
569	/*
570	 * Checks and cleanups are done.
571	 */
572
573	if (token == NULL) {
574		arc4random_buf(res->hr_token, sizeof(res->hr_token));
575		nvout = nv_alloc();
576		nv_add_uint8_array(nvout, res->hr_token,
577		    sizeof(res->hr_token), "token");
578		if (nv_error(nvout) != 0) {
579			pjdlog_common(LOG_ERR, 0, nv_error(nvout),
580			    "Unable to prepare return header for %s", raddr);
581			nv_add_stringf(nverr, "errmsg",
582			    "Remote node was unable to prepare return header: %s.",
583			    strerror(nv_error(nvout)));
584			goto fail;
585		}
586		if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) {
587			int error = errno;
588
589			pjdlog_errno(LOG_ERR, "Unable to send response to %s",
590			    raddr);
591			nv_add_stringf(nverr, "errmsg",
592			    "Remote node was unable to send response: %s.",
593			    strerror(error));
594			goto fail;
595		}
596		res->hr_remotein = conn;
597		pjdlog_debug(1, "Incoming connection from %s configured.",
598		    raddr);
599	} else {
600		res->hr_remoteout = conn;
601		pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
602		hastd_secondary(res, nvin);
603	}
604	nv_free(nvin);
605	nv_free(nvout);
606	nv_free(nverr);
607	pjdlog_prefix_set("%s", "");
608	return;
609fail:
610	if (nv_error(nverr) != 0) {
611		pjdlog_common(LOG_ERR, 0, nv_error(nverr),
612		    "Unable to prepare error header for %s", raddr);
613		goto close;
614	}
615	if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) {
616		pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
617		goto close;
618	}
619close:
620	if (nvin != NULL)
621		nv_free(nvin);
622	if (nvout != NULL)
623		nv_free(nvout);
624	if (nverr != NULL)
625		nv_free(nverr);
626	proto_close(conn);
627	pjdlog_prefix_set("%s", "");
628}
629
630static void
631main_loop(void)
632{
633	struct hast_resource *res;
634	struct timeval timeout;
635	int fd, maxfd, ret;
636	fd_set rfds;
637
638	timeout.tv_sec = REPORT_INTERVAL;
639	timeout.tv_usec = 0;
640
641	for (;;) {
642		if (sigexit_received) {
643			sigexit_received = false;
644			terminate_workers();
645			exit(EX_OK);
646		}
647		if (sigchld_received) {
648			sigchld_received = false;
649			child_exit();
650		}
651		if (sighup_received) {
652			sighup_received = false;
653			hastd_reload();
654		}
655
656		/* Setup descriptors for select(2). */
657		FD_ZERO(&rfds);
658		maxfd = fd = proto_descriptor(cfg->hc_controlconn);
659		FD_SET(fd, &rfds);
660		fd = proto_descriptor(cfg->hc_listenconn);
661		FD_SET(fd, &rfds);
662		maxfd = fd > maxfd ? fd : maxfd;
663		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
664			if (res->hr_event == NULL)
665				continue;
666			fd = proto_descriptor(res->hr_event);
667			FD_SET(fd, &rfds);
668			maxfd = fd > maxfd ? fd : maxfd;
669		}
670
671		ret = select(maxfd + 1, &rfds, NULL, NULL, &timeout);
672		if (ret == 0)
673			hook_check(false);
674		else if (ret == -1) {
675			if (errno == EINTR)
676				continue;
677			KEEP_ERRNO((void)pidfile_remove(pfh));
678			pjdlog_exit(EX_OSERR, "select() failed");
679		}
680
681		if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds))
682			control_handle(cfg);
683		if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds))
684			listen_accept();
685		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
686			if (res->hr_event == NULL)
687				continue;
688			if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) {
689				if (event_recv(res) == 0)
690					continue;
691				/* The worker process exited? */
692				proto_close(res->hr_event);
693				res->hr_event = NULL;
694			}
695		}
696	}
697}
698
699int
700main(int argc, char *argv[])
701{
702	const char *pidfile;
703	pid_t otherpid;
704	bool foreground;
705	int debuglevel;
706
707	g_gate_load();
708
709	foreground = false;
710	debuglevel = 0;
711	pidfile = HASTD_PIDFILE;
712
713	for (;;) {
714		int ch;
715
716		ch = getopt(argc, argv, "c:dFhP:");
717		if (ch == -1)
718			break;
719		switch (ch) {
720		case 'c':
721			cfgpath = optarg;
722			break;
723		case 'd':
724			debuglevel++;
725			break;
726		case 'F':
727			foreground = true;
728			break;
729		case 'P':
730			pidfile = optarg;
731			break;
732		case 'h':
733		default:
734			usage();
735		}
736	}
737	argc -= optind;
738	argv += optind;
739
740	pjdlog_debug_set(debuglevel);
741
742	pfh = pidfile_open(pidfile, 0600, &otherpid);
743	if (pfh == NULL) {
744		if (errno == EEXIST) {
745			pjdlog_exitx(EX_TEMPFAIL,
746			    "Another hastd is already running, pid: %jd.",
747			    (intmax_t)otherpid);
748		}
749		/* If we cannot create pidfile from other reasons, only warn. */
750		pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile");
751	}
752
753	cfg = yy_config_parse(cfgpath, true);
754	assert(cfg != NULL);
755
756	signal(SIGINT, sighandler);
757	signal(SIGTERM, sighandler);
758	signal(SIGHUP, sighandler);
759	signal(SIGCHLD, sighandler);
760
761	/* Listen on control address. */
762	if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) {
763		KEEP_ERRNO((void)pidfile_remove(pfh));
764		pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
765		    cfg->hc_controladdr);
766	}
767	/* Listen for remote connections. */
768	if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) {
769		KEEP_ERRNO((void)pidfile_remove(pfh));
770		pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
771		    cfg->hc_listenaddr);
772	}
773
774	if (!foreground) {
775		if (daemon(0, 0) < 0) {
776			KEEP_ERRNO((void)pidfile_remove(pfh));
777			pjdlog_exit(EX_OSERR, "Unable to daemonize");
778		}
779
780		/* Start logging to syslog. */
781		pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
782
783		/* Write PID to a file. */
784		if (pidfile_write(pfh) < 0) {
785			pjdlog_errno(LOG_WARNING,
786			    "Unable to write PID to a file");
787		}
788	}
789
790	hook_init();
791
792	main_loop();
793
794	exit(0);
795}
796