hastd.c revision 218374
1/*-
2 * Copyright (c) 2009-2010 The FreeBSD Foundation
3 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pjd@FreeBSD.org>
4 * All rights reserved.
5 *
6 * This software was developed by Pawel Jakub Dawidek under sponsorship from
7 * the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sbin/hastd/hastd.c 218374 2011-02-06 14:07:58Z pjd $");
33
34#include <sys/param.h>
35#include <sys/linker.h>
36#include <sys/module.h>
37#include <sys/stat.h>
38#include <sys/wait.h>
39
40#include <err.h>
41#include <errno.h>
42#include <libutil.h>
43#include <signal.h>
44#include <stdbool.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <sysexits.h>
49#include <unistd.h>
50
51#include <activemap.h>
52#include <pjdlog.h>
53
54#include "control.h"
55#include "event.h"
56#include "hast.h"
57#include "hast_proto.h"
58#include "hastd.h"
59#include "hooks.h"
60#include "subr.h"
61
62/* Path to configuration file. */
63const char *cfgpath = HAST_CONFIG;
64/* Hastd configuration. */
65static struct hastd_config *cfg;
66/* Was SIGINT or SIGTERM signal received? */
67bool sigexit_received = false;
68/* PID file handle. */
69struct pidfh *pfh;
70
71/* How often check for hooks running for too long. */
72#define	REPORT_INTERVAL	5
73
74static void
75usage(void)
76{
77
78	errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
79}
80
81static void
82g_gate_load(void)
83{
84
85	if (modfind("g_gate") == -1) {
86		/* Not present in kernel, try loading it. */
87		if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
88			if (errno != EEXIST) {
89				pjdlog_exit(EX_OSERR,
90				    "Unable to load geom_gate module");
91			}
92		}
93	}
94}
95
96void
97descriptors_cleanup(struct hast_resource *res)
98{
99	struct hast_resource *tres;
100
101	TAILQ_FOREACH(tres, &cfg->hc_resources, hr_next) {
102		if (tres == res) {
103			PJDLOG_VERIFY(res->hr_role == HAST_ROLE_SECONDARY ||
104			    (res->hr_remotein == NULL &&
105			     res->hr_remoteout == NULL));
106			continue;
107		}
108		if (tres->hr_remotein != NULL)
109			proto_close(tres->hr_remotein);
110		if (tres->hr_remoteout != NULL)
111			proto_close(tres->hr_remoteout);
112		if (tres->hr_ctrl != NULL)
113			proto_close(tres->hr_ctrl);
114		if (tres->hr_event != NULL)
115			proto_close(tres->hr_event);
116		if (tres->hr_conn != NULL)
117			proto_close(tres->hr_conn);
118	}
119	if (cfg->hc_controlin != NULL)
120		proto_close(cfg->hc_controlin);
121	proto_close(cfg->hc_controlconn);
122	proto_close(cfg->hc_listenconn);
123	(void)pidfile_close(pfh);
124	hook_fini();
125	pjdlog_fini();
126}
127
128static const char *
129dtype2str(mode_t mode)
130{
131
132	if (S_ISBLK(mode))
133		return ("block device");
134	else if (S_ISCHR(mode))
135		return ("character device");
136	else if (S_ISDIR(mode))
137		return ("directory");
138	else if (S_ISFIFO(mode))
139		return ("pipe or FIFO");
140	else if (S_ISLNK(mode))
141		return ("symbolic link");
142	else if (S_ISREG(mode))
143		return ("regular file");
144	else if (S_ISSOCK(mode))
145		return ("socket");
146	else if (S_ISWHT(mode))
147		return ("whiteout");
148	else
149		return ("unknown");
150}
151
152void
153descriptors_assert(const struct hast_resource *res, int pjdlogmode)
154{
155	char msg[256];
156	struct stat sb;
157	long maxfd;
158	bool isopen;
159	mode_t mode;
160	int fd;
161
162	/*
163	 * At this point descriptor to syslog socket is closed, so if we want
164	 * to log assertion message, we have to first store it in 'msg' local
165	 * buffer and then open syslog socket and log it.
166	 */
167	msg[0] = '\0';
168
169	maxfd = sysconf(_SC_OPEN_MAX);
170	if (maxfd < 0) {
171		pjdlog_init(pjdlogmode);
172		pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
173		    role2str(res->hr_role));
174		pjdlog_errno(LOG_WARNING, "sysconf(_SC_OPEN_MAX) failed");
175		pjdlog_fini();
176		maxfd = 16384;
177	}
178	for (fd = 0; fd <= maxfd; fd++) {
179		if (fstat(fd, &sb) == 0) {
180			isopen = true;
181			mode = sb.st_mode;
182		} else if (errno == EBADF) {
183			isopen = false;
184			mode = 0;
185		} else {
186			isopen = true;	/* silence gcc */
187			mode = 0;	/* silence gcc */
188			snprintf(msg, sizeof(msg),
189			    "Unable to fstat descriptor %d: %s", fd,
190			    strerror(errno));
191			break;
192		}
193		if (fd == STDIN_FILENO || fd == STDOUT_FILENO ||
194		    fd == STDERR_FILENO) {
195			if (!isopen) {
196				snprintf(msg, sizeof(msg),
197				    "Descriptor %d (%s) is closed, but should be open.",
198				    fd, (fd == STDIN_FILENO ? "stdin" :
199				    (fd == STDOUT_FILENO ? "stdout" : "stderr")));
200				break;
201			}
202		} else if (fd == proto_descriptor(res->hr_event)) {
203			if (!isopen) {
204				snprintf(msg, sizeof(msg),
205				    "Descriptor %d (event) is closed, but should be open.",
206				    fd);
207				break;
208			}
209			if (!S_ISSOCK(mode)) {
210				snprintf(msg, sizeof(msg),
211				    "Descriptor %d (event) is %s, but should be %s.",
212				    fd, dtype2str(mode), dtype2str(S_IFSOCK));
213				break;
214			}
215		} else if (fd == proto_descriptor(res->hr_ctrl)) {
216			if (!isopen) {
217				snprintf(msg, sizeof(msg),
218				    "Descriptor %d (ctrl) is closed, but should be open.",
219				    fd);
220				break;
221			}
222			if (!S_ISSOCK(mode)) {
223				snprintf(msg, sizeof(msg),
224				    "Descriptor %d (ctrl) is %s, but should be %s.",
225				    fd, dtype2str(mode), dtype2str(S_IFSOCK));
226				break;
227			}
228		} else if (fd == proto_descriptor(res->hr_conn)) {
229			if (!isopen) {
230				snprintf(msg, sizeof(msg),
231				    "Descriptor %d (conn) is closed, but should be open.",
232				    fd);
233				break;
234			}
235			if (!S_ISSOCK(mode)) {
236				snprintf(msg, sizeof(msg),
237				    "Descriptor %d (conn) is %s, but should be %s.",
238				    fd, dtype2str(mode), dtype2str(S_IFSOCK));
239				break;
240			}
241		} else if (res->hr_role == HAST_ROLE_SECONDARY &&
242		    fd == proto_descriptor(res->hr_remotein)) {
243			if (!isopen) {
244				snprintf(msg, sizeof(msg),
245				    "Descriptor %d (remote in) is closed, but should be open.",
246				    fd);
247				break;
248			}
249			if (!S_ISSOCK(mode)) {
250				snprintf(msg, sizeof(msg),
251				    "Descriptor %d (remote in) is %s, but should be %s.",
252				    fd, dtype2str(mode), dtype2str(S_IFSOCK));
253				break;
254			}
255		} else if (res->hr_role == HAST_ROLE_SECONDARY &&
256		    fd == proto_descriptor(res->hr_remoteout)) {
257			if (!isopen) {
258				snprintf(msg, sizeof(msg),
259				    "Descriptor %d (remote out) is closed, but should be open.",
260				    fd);
261				break;
262			}
263			if (!S_ISSOCK(mode)) {
264				snprintf(msg, sizeof(msg),
265				    "Descriptor %d (remote out) is %s, but should be %s.",
266				    fd, dtype2str(mode), dtype2str(S_IFSOCK));
267				break;
268			}
269		} else {
270			if (isopen) {
271				snprintf(msg, sizeof(msg),
272				    "Descriptor %d is open (%s), but should be closed.",
273				    fd, dtype2str(mode));
274				break;
275			}
276		}
277	}
278	if (msg[0] != '\0') {
279		pjdlog_init(pjdlogmode);
280		pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
281		    role2str(res->hr_role));
282		PJDLOG_ABORT("%s", msg);
283	}
284}
285
286static void
287child_exit_log(unsigned int pid, int status)
288{
289
290	if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
291		pjdlog_debug(1, "Worker process exited gracefully (pid=%u).",
292		    pid);
293	} else if (WIFSIGNALED(status)) {
294		pjdlog_error("Worker process killed (pid=%u, signal=%d).",
295		    pid, WTERMSIG(status));
296	} else {
297		pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).",
298		    pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1);
299	}
300}
301
302static void
303child_exit(void)
304{
305	struct hast_resource *res;
306	int status;
307	pid_t pid;
308
309	while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
310		/* Find resource related to the process that just exited. */
311		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
312			if (pid == res->hr_workerpid)
313				break;
314		}
315		if (res == NULL) {
316			/*
317			 * This can happen when new connection arrives and we
318			 * cancel child responsible for the old one or if this
319			 * was hook which we executed.
320			 */
321			hook_check_one(pid, status);
322			continue;
323		}
324		pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
325		    role2str(res->hr_role));
326		child_exit_log(pid, status);
327		child_cleanup(res);
328		if (res->hr_role == HAST_ROLE_PRIMARY) {
329			/*
330			 * Restart child process if it was killed by signal
331			 * or exited because of temporary problem.
332			 */
333			if (WIFSIGNALED(status) ||
334			    (WIFEXITED(status) &&
335			     WEXITSTATUS(status) == EX_TEMPFAIL)) {
336				sleep(1);
337				pjdlog_info("Restarting worker process.");
338				hastd_primary(res);
339			} else {
340				res->hr_role = HAST_ROLE_INIT;
341				pjdlog_info("Changing resource role back to %s.",
342				    role2str(res->hr_role));
343			}
344		}
345		pjdlog_prefix_set("%s", "");
346	}
347}
348
349static bool
350resource_needs_restart(const struct hast_resource *res0,
351    const struct hast_resource *res1)
352{
353
354	PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0);
355
356	if (strcmp(res0->hr_provname, res1->hr_provname) != 0)
357		return (true);
358	if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0)
359		return (true);
360	if (res0->hr_role == HAST_ROLE_INIT ||
361	    res0->hr_role == HAST_ROLE_SECONDARY) {
362		if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
363			return (true);
364		if (res0->hr_replication != res1->hr_replication)
365			return (true);
366		if (res0->hr_timeout != res1->hr_timeout)
367			return (true);
368		if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
369			return (true);
370	}
371	return (false);
372}
373
374static bool
375resource_needs_reload(const struct hast_resource *res0,
376    const struct hast_resource *res1)
377{
378
379	PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0);
380	PJDLOG_ASSERT(strcmp(res0->hr_provname, res1->hr_provname) == 0);
381	PJDLOG_ASSERT(strcmp(res0->hr_localpath, res1->hr_localpath) == 0);
382
383	if (res0->hr_role != HAST_ROLE_PRIMARY)
384		return (false);
385
386	if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
387		return (true);
388	if (res0->hr_replication != res1->hr_replication)
389		return (true);
390	if (res0->hr_timeout != res1->hr_timeout)
391		return (true);
392	if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
393		return (true);
394	return (false);
395}
396
397static void
398resource_reload(const struct hast_resource *res)
399{
400	struct nv *nvin, *nvout;
401	int error;
402
403	PJDLOG_ASSERT(res->hr_role == HAST_ROLE_PRIMARY);
404
405	nvout = nv_alloc();
406	nv_add_uint8(nvout, HASTCTL_RELOAD, "cmd");
407	nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr");
408	nv_add_int32(nvout, (int32_t)res->hr_replication, "replication");
409	nv_add_int32(nvout, (int32_t)res->hr_timeout, "timeout");
410	nv_add_string(nvout, res->hr_exec, "exec");
411	if (nv_error(nvout) != 0) {
412		nv_free(nvout);
413		pjdlog_error("Unable to allocate header for reload message.");
414		return;
415	}
416	if (hast_proto_send(res, res->hr_ctrl, nvout, NULL, 0) < 0) {
417		pjdlog_errno(LOG_ERR, "Unable to send reload message");
418		nv_free(nvout);
419		return;
420	}
421	nv_free(nvout);
422
423	/* Receive response. */
424	if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) {
425		pjdlog_errno(LOG_ERR, "Unable to receive reload reply");
426		return;
427	}
428	error = nv_get_int16(nvin, "error");
429	nv_free(nvin);
430	if (error != 0) {
431		pjdlog_common(LOG_ERR, 0, error, "Reload failed");
432		return;
433	}
434}
435
436static void
437hastd_reload(void)
438{
439	struct hastd_config *newcfg;
440	struct hast_resource *nres, *cres, *tres;
441	uint8_t role;
442
443	pjdlog_info("Reloading configuration...");
444
445	newcfg = yy_config_parse(cfgpath, false);
446	if (newcfg == NULL)
447		goto failed;
448
449	/*
450	 * Check if control address has changed.
451	 */
452	if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) {
453		if (proto_server(newcfg->hc_controladdr,
454		    &newcfg->hc_controlconn) < 0) {
455			pjdlog_errno(LOG_ERR,
456			    "Unable to listen on control address %s",
457			    newcfg->hc_controladdr);
458			goto failed;
459		}
460	}
461	/*
462	 * Check if listen address has changed.
463	 */
464	if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) {
465		if (proto_server(newcfg->hc_listenaddr,
466		    &newcfg->hc_listenconn) < 0) {
467			pjdlog_errno(LOG_ERR, "Unable to listen on address %s",
468			    newcfg->hc_listenaddr);
469			goto failed;
470		}
471	}
472	/*
473	 * Only when both control and listen sockets are successfully
474	 * initialized switch them to new configuration.
475	 */
476	if (newcfg->hc_controlconn != NULL) {
477		pjdlog_info("Control socket changed from %s to %s.",
478		    cfg->hc_controladdr, newcfg->hc_controladdr);
479		proto_close(cfg->hc_controlconn);
480		cfg->hc_controlconn = newcfg->hc_controlconn;
481		newcfg->hc_controlconn = NULL;
482		strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr,
483		    sizeof(cfg->hc_controladdr));
484	}
485	if (newcfg->hc_listenconn != NULL) {
486		pjdlog_info("Listen socket changed from %s to %s.",
487		    cfg->hc_listenaddr, newcfg->hc_listenaddr);
488		proto_close(cfg->hc_listenconn);
489		cfg->hc_listenconn = newcfg->hc_listenconn;
490		newcfg->hc_listenconn = NULL;
491		strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr,
492		    sizeof(cfg->hc_listenaddr));
493	}
494
495	/*
496	 * Stop and remove resources that were removed from the configuration.
497	 */
498	TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) {
499		TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) {
500			if (strcmp(cres->hr_name, nres->hr_name) == 0)
501				break;
502		}
503		if (nres == NULL) {
504			control_set_role(cres, HAST_ROLE_INIT);
505			TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
506			pjdlog_info("Resource %s removed.", cres->hr_name);
507			free(cres);
508		}
509	}
510	/*
511	 * Move new resources to the current configuration.
512	 */
513	TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
514		TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
515			if (strcmp(cres->hr_name, nres->hr_name) == 0)
516				break;
517		}
518		if (cres == NULL) {
519			TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
520			TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
521			pjdlog_info("Resource %s added.", nres->hr_name);
522		}
523	}
524	/*
525	 * Deal with modified resources.
526	 * Depending on what has changed exactly we might want to perform
527	 * different actions.
528	 *
529	 * We do full resource restart in the following situations:
530	 * Resource role is INIT or SECONDARY.
531	 * Resource role is PRIMARY and path to local component or provider
532	 * name has changed.
533	 * In case of PRIMARY, the worker process will be killed and restarted,
534	 * which also means removing /dev/hast/<name> provider and
535	 * recreating it.
536	 *
537	 * We do just reload (send SIGHUP to worker process) if we act as
538	 * PRIMARY, but only if remote address, replication mode, timeout or
539	 * execution path has changed. For those, there is no need to restart
540	 * worker process.
541	 * If PRIMARY receives SIGHUP, it will reconnect if remote address or
542	 * replication mode has changed or simply set new timeout if only
543	 * timeout has changed.
544	 */
545	TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
546		TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
547			if (strcmp(cres->hr_name, nres->hr_name) == 0)
548				break;
549		}
550		PJDLOG_ASSERT(cres != NULL);
551		if (resource_needs_restart(cres, nres)) {
552			pjdlog_info("Resource %s configuration was modified, restarting it.",
553			    cres->hr_name);
554			role = cres->hr_role;
555			control_set_role(cres, HAST_ROLE_INIT);
556			TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
557			free(cres);
558			TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
559			TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
560			control_set_role(nres, role);
561		} else if (resource_needs_reload(cres, nres)) {
562			pjdlog_info("Resource %s configuration was modified, reloading it.",
563			    cres->hr_name);
564			strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr,
565			    sizeof(cres->hr_remoteaddr));
566			cres->hr_replication = nres->hr_replication;
567			cres->hr_timeout = nres->hr_timeout;
568			strlcpy(cres->hr_exec, nres->hr_exec,
569			    sizeof(cres->hr_exec));
570			if (cres->hr_workerpid != 0)
571				resource_reload(cres);
572		}
573	}
574
575	yy_config_free(newcfg);
576	pjdlog_info("Configuration reloaded successfully.");
577	return;
578failed:
579	if (newcfg != NULL) {
580		if (newcfg->hc_controlconn != NULL)
581			proto_close(newcfg->hc_controlconn);
582		if (newcfg->hc_listenconn != NULL)
583			proto_close(newcfg->hc_listenconn);
584		yy_config_free(newcfg);
585	}
586	pjdlog_warning("Configuration not reloaded.");
587}
588
589static void
590terminate_workers(void)
591{
592	struct hast_resource *res;
593
594	pjdlog_info("Termination signal received, exiting.");
595	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
596		if (res->hr_workerpid == 0)
597			continue;
598		pjdlog_info("Terminating worker process (resource=%s, role=%s, pid=%u).",
599		    res->hr_name, role2str(res->hr_role), res->hr_workerpid);
600		if (kill(res->hr_workerpid, SIGTERM) == 0)
601			continue;
602		pjdlog_errno(LOG_WARNING,
603		    "Unable to send signal to worker process (resource=%s, role=%s, pid=%u).",
604		    res->hr_name, role2str(res->hr_role), res->hr_workerpid);
605	}
606}
607
608static void
609listen_accept(void)
610{
611	struct hast_resource *res;
612	struct proto_conn *conn;
613	struct nv *nvin, *nvout, *nverr;
614	const char *resname;
615	const unsigned char *token;
616	char laddr[256], raddr[256];
617	size_t size;
618	pid_t pid;
619	int status;
620
621	proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr));
622	pjdlog_debug(1, "Accepting connection to %s.", laddr);
623
624	if (proto_accept(cfg->hc_listenconn, &conn) < 0) {
625		pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
626		return;
627	}
628
629	proto_local_address(conn, laddr, sizeof(laddr));
630	proto_remote_address(conn, raddr, sizeof(raddr));
631	pjdlog_info("Connection from %s to %s.", raddr, laddr);
632
633	/* Error in setting timeout is not critical, but why should it fail? */
634	if (proto_timeout(conn, HAST_TIMEOUT) < 0)
635		pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
636
637	nvin = nvout = nverr = NULL;
638
639	/*
640	 * Before receiving any data see if remote host have access to any
641	 * resource.
642	 */
643	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
644		if (proto_address_match(conn, res->hr_remoteaddr))
645			break;
646	}
647	if (res == NULL) {
648		pjdlog_error("Client %s isn't known.", raddr);
649		goto close;
650	}
651	/* Ok, remote host can access at least one resource. */
652
653	if (hast_proto_recv_hdr(conn, &nvin) < 0) {
654		pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
655		    raddr);
656		goto close;
657	}
658
659	resname = nv_get_string(nvin, "resource");
660	if (resname == NULL) {
661		pjdlog_error("No 'resource' field in the header received from %s.",
662		    raddr);
663		goto close;
664	}
665	pjdlog_debug(2, "%s: resource=%s", raddr, resname);
666	token = nv_get_uint8_array(nvin, &size, "token");
667	/*
668	 * NULL token means that this is first conection.
669	 */
670	if (token != NULL && size != sizeof(res->hr_token)) {
671		pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
672		    raddr, sizeof(res->hr_token), size);
673		goto close;
674	}
675
676	/*
677	 * From now on we want to send errors to the remote node.
678	 */
679	nverr = nv_alloc();
680
681	/* Find resource related to this connection. */
682	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
683		if (strcmp(resname, res->hr_name) == 0)
684			break;
685	}
686	/* Have we found the resource? */
687	if (res == NULL) {
688		pjdlog_error("No resource '%s' as requested by %s.",
689		    resname, raddr);
690		nv_add_stringf(nverr, "errmsg", "Resource not configured.");
691		goto fail;
692	}
693
694	/* Now that we know resource name setup log prefix. */
695	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
696
697	/* Does the remote host have access to this resource? */
698	if (!proto_address_match(conn, res->hr_remoteaddr)) {
699		pjdlog_error("Client %s has no access to the resource.", raddr);
700		nv_add_stringf(nverr, "errmsg", "No access to the resource.");
701		goto fail;
702	}
703	/* Is the resource marked as secondary? */
704	if (res->hr_role != HAST_ROLE_SECONDARY) {
705		pjdlog_error("We act as %s for the resource and not as %s as requested by %s.",
706		    role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
707		    raddr);
708		nv_add_stringf(nverr, "errmsg",
709		    "Remote node acts as %s for the resource and not as %s.",
710		    role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
711		goto fail;
712	}
713	/* Does token (if exists) match? */
714	if (token != NULL && memcmp(token, res->hr_token,
715	    sizeof(res->hr_token)) != 0) {
716		pjdlog_error("Token received from %s doesn't match.", raddr);
717		nv_add_stringf(nverr, "errmsg", "Token doesn't match.");
718		goto fail;
719	}
720	/*
721	 * If there is no token, but we have half-open connection
722	 * (only remotein) or full connection (worker process is running)
723	 * we have to cancel those and accept the new connection.
724	 */
725	if (token == NULL) {
726		PJDLOG_ASSERT(res->hr_remoteout == NULL);
727		pjdlog_debug(1, "Initial connection from %s.", raddr);
728		if (res->hr_workerpid != 0) {
729			PJDLOG_ASSERT(res->hr_remotein == NULL);
730			pjdlog_debug(1,
731			    "Worker process exists (pid=%u), stopping it.",
732			    (unsigned int)res->hr_workerpid);
733			/* Stop child process. */
734			if (kill(res->hr_workerpid, SIGINT) < 0) {
735				pjdlog_errno(LOG_ERR,
736				    "Unable to stop worker process (pid=%u)",
737				    (unsigned int)res->hr_workerpid);
738				/*
739				 * Other than logging the problem we
740				 * ignore it - nothing smart to do.
741				 */
742			}
743			/* Wait for it to exit. */
744			else if ((pid = waitpid(res->hr_workerpid,
745			    &status, 0)) != res->hr_workerpid) {
746				/* We can only log the problem. */
747				pjdlog_errno(LOG_ERR,
748				    "Waiting for worker process (pid=%u) failed",
749				    (unsigned int)res->hr_workerpid);
750			} else {
751				child_exit_log(res->hr_workerpid, status);
752			}
753			child_cleanup(res);
754		} else if (res->hr_remotein != NULL) {
755			char oaddr[256];
756
757			proto_remote_address(res->hr_remotein, oaddr,
758			    sizeof(oaddr));
759			pjdlog_debug(1,
760			    "Canceling half-open connection from %s on connection from %s.",
761			    oaddr, raddr);
762			proto_close(res->hr_remotein);
763			res->hr_remotein = NULL;
764		}
765	}
766
767	/*
768	 * Checks and cleanups are done.
769	 */
770
771	if (token == NULL) {
772		arc4random_buf(res->hr_token, sizeof(res->hr_token));
773		nvout = nv_alloc();
774		nv_add_uint8_array(nvout, res->hr_token,
775		    sizeof(res->hr_token), "token");
776		if (nv_error(nvout) != 0) {
777			pjdlog_common(LOG_ERR, 0, nv_error(nvout),
778			    "Unable to prepare return header for %s", raddr);
779			nv_add_stringf(nverr, "errmsg",
780			    "Remote node was unable to prepare return header: %s.",
781			    strerror(nv_error(nvout)));
782			goto fail;
783		}
784		if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) {
785			int error = errno;
786
787			pjdlog_errno(LOG_ERR, "Unable to send response to %s",
788			    raddr);
789			nv_add_stringf(nverr, "errmsg",
790			    "Remote node was unable to send response: %s.",
791			    strerror(error));
792			goto fail;
793		}
794		res->hr_remotein = conn;
795		pjdlog_debug(1, "Incoming connection from %s configured.",
796		    raddr);
797	} else {
798		res->hr_remoteout = conn;
799		pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
800		hastd_secondary(res, nvin);
801	}
802	nv_free(nvin);
803	nv_free(nvout);
804	nv_free(nverr);
805	pjdlog_prefix_set("%s", "");
806	return;
807fail:
808	if (nv_error(nverr) != 0) {
809		pjdlog_common(LOG_ERR, 0, nv_error(nverr),
810		    "Unable to prepare error header for %s", raddr);
811		goto close;
812	}
813	if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) {
814		pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
815		goto close;
816	}
817close:
818	if (nvin != NULL)
819		nv_free(nvin);
820	if (nvout != NULL)
821		nv_free(nvout);
822	if (nverr != NULL)
823		nv_free(nverr);
824	proto_close(conn);
825	pjdlog_prefix_set("%s", "");
826}
827
828static void
829connection_migrate(struct hast_resource *res)
830{
831	struct proto_conn *conn;
832	int16_t val = 0;
833
834	if (proto_recv(res->hr_conn, &val, sizeof(val)) < 0) {
835		pjdlog_errno(LOG_WARNING,
836		    "Unable to receive connection command");
837		return;
838	}
839	if (proto_client(res->hr_remoteaddr, &conn) < 0) {
840		val = errno;
841		pjdlog_errno(LOG_WARNING,
842		    "Unable to create outgoing connection to %s",
843		    res->hr_remoteaddr);
844		goto out;
845	}
846	if (proto_connect(conn, -1) < 0) {
847		val = errno;
848		pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
849		    res->hr_remoteaddr);
850		proto_close(conn);
851		goto out;
852	}
853	val = 0;
854out:
855	if (proto_send(res->hr_conn, &val, sizeof(val)) < 0) {
856		pjdlog_errno(LOG_WARNING,
857		    "Unable to send reply to connection request");
858	}
859	if (val == 0 && proto_connection_send(res->hr_conn, conn) < 0)
860		pjdlog_errno(LOG_WARNING, "Unable to send connection");
861}
862
863static void
864main_loop(void)
865{
866	struct hast_resource *res;
867	struct timeval seltimeout;
868	struct timespec sigtimeout;
869	int fd, maxfd, ret, signo;
870	sigset_t mask;
871	fd_set rfds;
872
873	seltimeout.tv_sec = REPORT_INTERVAL;
874	seltimeout.tv_usec = 0;
875	sigtimeout.tv_sec = 0;
876	sigtimeout.tv_nsec = 0;
877
878	PJDLOG_VERIFY(sigemptyset(&mask) == 0);
879	PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
880	PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);
881	PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
882	PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
883
884	pjdlog_info("Started successfully, running protocol version %d.",
885	    HAST_PROTO_VERSION);
886
887	for (;;) {
888		while ((signo = sigtimedwait(&mask, NULL, &sigtimeout)) != -1) {
889			switch (signo) {
890			case SIGINT:
891			case SIGTERM:
892				sigexit_received = true;
893				terminate_workers();
894				proto_close(cfg->hc_controlconn);
895				exit(EX_OK);
896				break;
897			case SIGCHLD:
898				child_exit();
899				break;
900			case SIGHUP:
901				hastd_reload();
902				break;
903			default:
904				PJDLOG_ABORT("Unexpected signal (%d).", signo);
905			}
906		}
907
908		/* Setup descriptors for select(2). */
909		FD_ZERO(&rfds);
910		maxfd = fd = proto_descriptor(cfg->hc_controlconn);
911		PJDLOG_ASSERT(fd >= 0);
912		FD_SET(fd, &rfds);
913		fd = proto_descriptor(cfg->hc_listenconn);
914		PJDLOG_ASSERT(fd >= 0);
915		FD_SET(fd, &rfds);
916		maxfd = fd > maxfd ? fd : maxfd;
917		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
918			if (res->hr_event == NULL)
919				continue;
920			PJDLOG_ASSERT(res->hr_conn != NULL);
921			fd = proto_descriptor(res->hr_event);
922			PJDLOG_ASSERT(fd >= 0);
923			FD_SET(fd, &rfds);
924			maxfd = fd > maxfd ? fd : maxfd;
925			if (res->hr_role == HAST_ROLE_PRIMARY) {
926				/* Only primary workers asks for connections. */
927				fd = proto_descriptor(res->hr_conn);
928				PJDLOG_ASSERT(fd >= 0);
929				FD_SET(fd, &rfds);
930				maxfd = fd > maxfd ? fd : maxfd;
931			}
932		}
933
934		PJDLOG_ASSERT(maxfd + 1 <= (int)FD_SETSIZE);
935		ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout);
936		if (ret == 0)
937			hook_check();
938		else if (ret == -1) {
939			if (errno == EINTR)
940				continue;
941			KEEP_ERRNO((void)pidfile_remove(pfh));
942			pjdlog_exit(EX_OSERR, "select() failed");
943		}
944
945		if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds))
946			control_handle(cfg);
947		if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds))
948			listen_accept();
949		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
950			if (res->hr_event == NULL)
951				continue;
952			PJDLOG_ASSERT(res->hr_conn != NULL);
953			if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) {
954				if (event_recv(res) == 0)
955					continue;
956				/* The worker process exited? */
957				proto_close(res->hr_event);
958				res->hr_event = NULL;
959				proto_close(res->hr_conn);
960				res->hr_conn = NULL;
961				continue;
962			}
963			if (res->hr_role == HAST_ROLE_PRIMARY &&
964			    FD_ISSET(proto_descriptor(res->hr_conn), &rfds)) {
965				connection_migrate(res);
966			}
967		}
968	}
969}
970
971static void
972dummy_sighandler(int sig __unused)
973{
974	/* Nothing to do. */
975}
976
977int
978main(int argc, char *argv[])
979{
980	const char *pidfile;
981	pid_t otherpid;
982	bool foreground;
983	int debuglevel;
984	sigset_t mask;
985
986	foreground = false;
987	debuglevel = 0;
988	pidfile = HASTD_PIDFILE;
989
990	for (;;) {
991		int ch;
992
993		ch = getopt(argc, argv, "c:dFhP:");
994		if (ch == -1)
995			break;
996		switch (ch) {
997		case 'c':
998			cfgpath = optarg;
999			break;
1000		case 'd':
1001			debuglevel++;
1002			break;
1003		case 'F':
1004			foreground = true;
1005			break;
1006		case 'P':
1007			pidfile = optarg;
1008			break;
1009		case 'h':
1010		default:
1011			usage();
1012		}
1013	}
1014	argc -= optind;
1015	argv += optind;
1016
1017	pjdlog_init(PJDLOG_MODE_STD);
1018	pjdlog_debug_set(debuglevel);
1019
1020	g_gate_load();
1021
1022	pfh = pidfile_open(pidfile, 0600, &otherpid);
1023	if (pfh == NULL) {
1024		if (errno == EEXIST) {
1025			pjdlog_exitx(EX_TEMPFAIL,
1026			    "Another hastd is already running, pid: %jd.",
1027			    (intmax_t)otherpid);
1028		}
1029		/* If we cannot create pidfile from other reasons, only warn. */
1030		pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile");
1031	}
1032
1033	cfg = yy_config_parse(cfgpath, true);
1034	PJDLOG_ASSERT(cfg != NULL);
1035
1036	/*
1037	 * Restore default actions for interesting signals in case parent
1038	 * process (like init(8)) decided to ignore some of them (like SIGHUP).
1039	 */
1040	PJDLOG_VERIFY(signal(SIGHUP, SIG_DFL) != SIG_ERR);
1041	PJDLOG_VERIFY(signal(SIGINT, SIG_DFL) != SIG_ERR);
1042	PJDLOG_VERIFY(signal(SIGTERM, SIG_DFL) != SIG_ERR);
1043	/*
1044	 * Because SIGCHLD is ignored by default, setup dummy handler for it,
1045	 * so we can mask it.
1046	 */
1047	PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR);
1048
1049	PJDLOG_VERIFY(sigemptyset(&mask) == 0);
1050	PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
1051	PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);
1052	PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
1053	PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
1054	PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1055
1056	/* Listen on control address. */
1057	if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) {
1058		KEEP_ERRNO((void)pidfile_remove(pfh));
1059		pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
1060		    cfg->hc_controladdr);
1061	}
1062	/* Listen for remote connections. */
1063	if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) {
1064		KEEP_ERRNO((void)pidfile_remove(pfh));
1065		pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
1066		    cfg->hc_listenaddr);
1067	}
1068
1069	if (!foreground) {
1070		if (daemon(0, 0) < 0) {
1071			KEEP_ERRNO((void)pidfile_remove(pfh));
1072			pjdlog_exit(EX_OSERR, "Unable to daemonize");
1073		}
1074
1075		/* Start logging to syslog. */
1076		pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
1077
1078		/* Write PID to a file. */
1079		if (pidfile_write(pfh) < 0) {
1080			pjdlog_errno(LOG_WARNING,
1081			    "Unable to write PID to a file");
1082		}
1083	}
1084
1085	hook_init();
1086
1087	main_loop();
1088
1089	exit(0);
1090}
1091