hastd.c revision 207372
1/*-
2 * Copyright (c) 2009-2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Pawel Jakub Dawidek under sponsorship from
6 * the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sbin/hastd/hastd.c 207372 2010-04-29 15:42:24Z pjd $");
32
33#include <sys/param.h>
34#include <sys/linker.h>
35#include <sys/module.h>
36#include <sys/wait.h>
37
38#include <assert.h>
39#include <err.h>
40#include <errno.h>
41#include <libutil.h>
42#include <signal.h>
43#include <stdbool.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <sysexits.h>
48#include <unistd.h>
49
50#include <activemap.h>
51#include <pjdlog.h>
52
53#include "control.h"
54#include "hast.h"
55#include "hast_proto.h"
56#include "hastd.h"
57#include "subr.h"
58
59/* Path to configuration file. */
60static const char *cfgpath = HAST_CONFIG;
61/* Hastd configuration. */
62static struct hastd_config *cfg;
63/* Was SIGCHLD signal received? */
64static bool sigchld_received = false;
65/* Was SIGHUP signal received? */
66static bool sighup_received = false;
67/* Was SIGINT or SIGTERM signal received? */
68bool sigexit_received = false;
69/* PID file handle. */
70struct pidfh *pfh;
71
72static void
73usage(void)
74{
75
76	errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
77}
78
79static void
80sighandler(int sig)
81{
82
83	switch (sig) {
84	case SIGCHLD:
85		sigchld_received = true;
86		break;
87	case SIGHUP:
88		sighup_received = true;
89		break;
90	default:
91		assert(!"invalid condition");
92	}
93}
94
95static void
96g_gate_load(void)
97{
98
99	if (modfind("g_gate") == -1) {
100		/* Not present in kernel, try loading it. */
101		if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
102			if (errno != EEXIST) {
103				pjdlog_exit(EX_OSERR,
104				    "Unable to load geom_gate module");
105			}
106		}
107	}
108}
109
110static void
111child_exit_log(unsigned int pid, int status)
112{
113
114	if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
115		pjdlog_debug(1, "Worker process exited gracefully (pid=%u).",
116		    pid);
117	} else if (WIFSIGNALED(status)) {
118		pjdlog_error("Worker process killed (pid=%u, signal=%d).",
119		    pid, WTERMSIG(status));
120	} else {
121		pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).",
122		    pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1);
123	}
124}
125
126static void
127child_exit(void)
128{
129	struct hast_resource *res;
130	int status;
131	pid_t pid;
132
133	while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
134		/* Find resource related to the process that just exited. */
135		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
136			if (pid == res->hr_workerpid)
137				break;
138		}
139		if (res == NULL) {
140			/*
141			 * This can happen when new connection arrives and we
142			 * cancel child responsible for the old one.
143			 */
144			continue;
145		}
146		pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
147		    role2str(res->hr_role));
148		child_exit_log(pid, status);
149		proto_close(res->hr_ctrl);
150		res->hr_workerpid = 0;
151		if (res->hr_role == HAST_ROLE_PRIMARY) {
152			/*
153			 * Restart child process if it was killed by signal
154			 * or exited because of temporary problem.
155			 */
156			if (WIFSIGNALED(status) ||
157			    (WIFEXITED(status) &&
158			     WEXITSTATUS(status) == EX_TEMPFAIL)) {
159				sleep(1);
160				pjdlog_info("Restarting worker process.");
161				hastd_primary(res);
162			} else {
163				res->hr_role = HAST_ROLE_INIT;
164				pjdlog_info("Changing resource role back to %s.",
165				    role2str(res->hr_role));
166			}
167		}
168		pjdlog_prefix_set("%s", "");
169	}
170}
171
172static void
173hastd_reload(void)
174{
175
176	/* TODO */
177	pjdlog_warning("Configuration reload is not implemented.");
178}
179
180static void
181listen_accept(void)
182{
183	struct hast_resource *res;
184	struct proto_conn *conn;
185	struct nv *nvin, *nvout, *nverr;
186	const char *resname;
187	const unsigned char *token;
188	char laddr[256], raddr[256];
189	size_t size;
190	pid_t pid;
191	int status;
192
193	proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr));
194	pjdlog_debug(1, "Accepting connection to %s.", laddr);
195
196	if (proto_accept(cfg->hc_listenconn, &conn) < 0) {
197		pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
198		return;
199	}
200
201	proto_local_address(conn, laddr, sizeof(laddr));
202	proto_remote_address(conn, raddr, sizeof(raddr));
203	pjdlog_info("Connection from %s to %s.", laddr, raddr);
204
205	/* Error in setting timeout is not critical, but why should it fail? */
206	if (proto_timeout(conn, HAST_TIMEOUT) < 0)
207		pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
208
209	nvin = nvout = nverr = NULL;
210
211	/*
212	 * Before receiving any data see if remote host have access to any
213	 * resource.
214	 */
215	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
216		if (proto_address_match(conn, res->hr_remoteaddr))
217			break;
218	}
219	if (res == NULL) {
220		pjdlog_error("Client %s isn't known.", raddr);
221		goto close;
222	}
223	/* Ok, remote host can access at least one resource. */
224
225	if (hast_proto_recv_hdr(conn, &nvin) < 0) {
226		pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
227		    raddr);
228		goto close;
229	}
230
231	resname = nv_get_string(nvin, "resource");
232	if (resname == NULL) {
233		pjdlog_error("No 'resource' field in the header received from %s.",
234		    raddr);
235		goto close;
236	}
237	pjdlog_debug(2, "%s: resource=%s", raddr, resname);
238	token = nv_get_uint8_array(nvin, &size, "token");
239	/*
240	 * NULL token means that this is first conection.
241	 */
242	if (token != NULL && size != sizeof(res->hr_token)) {
243		pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
244		    raddr, sizeof(res->hr_token), size);
245		goto close;
246	}
247
248	/*
249	 * From now on we want to send errors to the remote node.
250	 */
251	nverr = nv_alloc();
252
253	/* Find resource related to this connection. */
254	TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
255		if (strcmp(resname, res->hr_name) == 0)
256			break;
257	}
258	/* Have we found the resource? */
259	if (res == NULL) {
260		pjdlog_error("No resource '%s' as requested by %s.",
261		    resname, raddr);
262		nv_add_stringf(nverr, "errmsg", "Resource not configured.");
263		goto fail;
264	}
265
266	/* Now that we know resource name setup log prefix. */
267	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
268
269	/* Does the remote host have access to this resource? */
270	if (!proto_address_match(conn, res->hr_remoteaddr)) {
271		pjdlog_error("Client %s has no access to the resource.", raddr);
272		nv_add_stringf(nverr, "errmsg", "No access to the resource.");
273		goto fail;
274	}
275	/* Is the resource marked as secondary? */
276	if (res->hr_role != HAST_ROLE_SECONDARY) {
277		pjdlog_error("We act as %s for the resource and not as %s as requested by %s.",
278		    role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
279		    raddr);
280		nv_add_stringf(nverr, "errmsg",
281		    "Remote node acts as %s for the resource and not as %s.",
282		    role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
283		goto fail;
284	}
285	/* Does token (if exists) match? */
286	if (token != NULL && memcmp(token, res->hr_token,
287	    sizeof(res->hr_token)) != 0) {
288		pjdlog_error("Token received from %s doesn't match.", raddr);
289		nv_add_stringf(nverr, "errmsg", "Toke doesn't match.");
290		goto fail;
291	}
292	/*
293	 * If there is no token, but we have half-open connection
294	 * (only remotein) or full connection (worker process is running)
295	 * we have to cancel those and accept the new connection.
296	 */
297	if (token == NULL) {
298		assert(res->hr_remoteout == NULL);
299		pjdlog_debug(1, "Initial connection from %s.", raddr);
300		if (res->hr_workerpid != 0) {
301			assert(res->hr_remotein == NULL);
302			pjdlog_debug(1,
303			    "Worker process exists (pid=%u), stopping it.",
304			    (unsigned int)res->hr_workerpid);
305			/* Stop child process. */
306			if (kill(res->hr_workerpid, SIGINT) < 0) {
307				pjdlog_errno(LOG_ERR,
308				    "Unable to stop worker process (pid=%u)",
309				    (unsigned int)res->hr_workerpid);
310				/*
311				 * Other than logging the problem we
312				 * ignore it - nothing smart to do.
313				 */
314			}
315			/* Wait for it to exit. */
316			else if ((pid = waitpid(res->hr_workerpid,
317			    &status, 0)) != res->hr_workerpid) {
318				/* We can only log the problem. */
319				pjdlog_errno(LOG_ERR,
320				    "Waiting for worker process (pid=%u) failed",
321				    (unsigned int)res->hr_workerpid);
322			} else {
323				child_exit_log(res->hr_workerpid, status);
324			}
325			res->hr_workerpid = 0;
326		} else if (res->hr_remotein != NULL) {
327			char oaddr[256];
328
329			proto_remote_address(conn, oaddr, sizeof(oaddr));
330			pjdlog_debug(1,
331			    "Canceling half-open connection from %s on connection from %s.",
332			    oaddr, raddr);
333			proto_close(res->hr_remotein);
334			res->hr_remotein = NULL;
335		}
336	}
337
338	/*
339	 * Checks and cleanups are done.
340	 */
341
342	if (token == NULL) {
343		arc4random_buf(res->hr_token, sizeof(res->hr_token));
344		nvout = nv_alloc();
345		nv_add_uint8_array(nvout, res->hr_token,
346		    sizeof(res->hr_token), "token");
347		if (nv_error(nvout) != 0) {
348			pjdlog_common(LOG_ERR, 0, nv_error(nvout),
349			    "Unable to prepare return header for %s", raddr);
350			nv_add_stringf(nverr, "errmsg",
351			    "Remote node was unable to prepare return header: %s.",
352			    strerror(nv_error(nvout)));
353			goto fail;
354		}
355		if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) {
356			int error = errno;
357
358			pjdlog_errno(LOG_ERR, "Unable to send response to %s",
359			    raddr);
360			nv_add_stringf(nverr, "errmsg",
361			    "Remote node was unable to send response: %s.",
362			    strerror(error));
363			goto fail;
364		}
365		res->hr_remotein = conn;
366		pjdlog_debug(1, "Incoming connection from %s configured.",
367		    raddr);
368	} else {
369		res->hr_remoteout = conn;
370		pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
371		hastd_secondary(res, nvin);
372	}
373	nv_free(nvin);
374	nv_free(nvout);
375	nv_free(nverr);
376	pjdlog_prefix_set("%s", "");
377	return;
378fail:
379	if (nv_error(nverr) != 0) {
380		pjdlog_common(LOG_ERR, 0, nv_error(nverr),
381		    "Unable to prepare error header for %s", raddr);
382		goto close;
383	}
384	if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) {
385		pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
386		goto close;
387	}
388close:
389	if (nvin != NULL)
390		nv_free(nvin);
391	if (nvout != NULL)
392		nv_free(nvout);
393	if (nverr != NULL)
394		nv_free(nverr);
395	proto_close(conn);
396	pjdlog_prefix_set("%s", "");
397}
398
399static void
400main_loop(void)
401{
402	fd_set rfds, wfds;
403	int fd, maxfd, ret;
404
405	for (;;) {
406		if (sigchld_received) {
407			sigchld_received = false;
408			child_exit();
409		}
410		if (sighup_received) {
411			sighup_received = false;
412			hastd_reload();
413		}
414
415		maxfd = 0;
416		FD_ZERO(&rfds);
417		FD_ZERO(&wfds);
418
419		/* Setup descriptors for select(2). */
420#define	SETUP_FD(conn)	do {						\
421	fd = proto_descriptor(conn);					\
422	if (fd >= 0) {							\
423		maxfd = fd > maxfd ? fd : maxfd;			\
424		FD_SET(fd, &rfds);					\
425		FD_SET(fd, &wfds);					\
426	}								\
427} while (0)
428		SETUP_FD(cfg->hc_controlconn);
429		SETUP_FD(cfg->hc_listenconn);
430#undef	SETUP_FD
431
432		ret = select(maxfd + 1, &rfds, &wfds, NULL, NULL);
433		if (ret == -1) {
434			if (errno == EINTR)
435				continue;
436			KEEP_ERRNO((void)pidfile_remove(pfh));
437			pjdlog_exit(EX_OSERR, "select() failed");
438		}
439
440#define	ISSET_FD(conn)	\
441	(FD_ISSET((fd = proto_descriptor(conn)), &rfds) || FD_ISSET(fd, &wfds))
442		if (ISSET_FD(cfg->hc_controlconn))
443			control_handle(cfg);
444		if (ISSET_FD(cfg->hc_listenconn))
445			listen_accept();
446#undef	ISSET_FD
447	}
448}
449
450int
451main(int argc, char *argv[])
452{
453	const char *pidfile;
454	pid_t otherpid;
455	bool foreground;
456	int debuglevel;
457
458	g_gate_load();
459
460	foreground = false;
461	debuglevel = 0;
462	pidfile = HASTD_PIDFILE;
463
464	for (;;) {
465		int ch;
466
467		ch = getopt(argc, argv, "c:dFhP:");
468		if (ch == -1)
469			break;
470		switch (ch) {
471		case 'c':
472			cfgpath = optarg;
473			break;
474		case 'd':
475			debuglevel++;
476			break;
477		case 'F':
478			foreground = true;
479			break;
480		case 'P':
481			pidfile = optarg;
482			break;
483		case 'h':
484		default:
485			usage();
486		}
487	}
488	argc -= optind;
489	argv += optind;
490
491	pjdlog_debug_set(debuglevel);
492
493	pfh = pidfile_open(pidfile, 0600, &otherpid);
494	if (pfh == NULL) {
495		if (errno == EEXIST) {
496			pjdlog_exitx(EX_TEMPFAIL,
497			    "Another hastd is already running, pid: %jd.",
498			    (intmax_t)otherpid);
499		}
500		/* If we cannot create pidfile from other reasons, only warn. */
501		pjdlog_errno(LOG_WARNING, "Cannot open or create pidfile");
502	}
503
504	cfg = yy_config_parse(cfgpath);
505	assert(cfg != NULL);
506
507	signal(SIGHUP, sighandler);
508	signal(SIGCHLD, sighandler);
509
510	/* Listen on control address. */
511	if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) {
512		KEEP_ERRNO((void)pidfile_remove(pfh));
513		pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
514		    cfg->hc_controladdr);
515	}
516	/* Listen for remote connections. */
517	if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) {
518		KEEP_ERRNO((void)pidfile_remove(pfh));
519		pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
520		    cfg->hc_listenaddr);
521	}
522
523	if (!foreground) {
524		if (daemon(0, 0) < 0) {
525			KEEP_ERRNO((void)pidfile_remove(pfh));
526			pjdlog_exit(EX_OSERR, "Unable to daemonize");
527		}
528
529		/* Start logging to syslog. */
530		pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
531
532		/* Write PID to a file. */
533		if (pidfile_write(pfh) < 0) {
534			pjdlog_errno(LOG_WARNING,
535			    "Unable to write PID to a file");
536		}
537	}
538
539	main_loop();
540
541	exit(0);
542}
543