Deleted Added
full compact
hastd.c (207348) hastd.c (207371)
1/*-
2 * Copyright (c) 2009-2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Pawel Jakub Dawidek under sponsorship from
6 * the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2009-2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Pawel Jakub Dawidek under sponsorship from
6 * the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sbin/hastd/hastd.c 207348 2010-04-28 22:41:06Z pjd $");
31__FBSDID("$FreeBSD: head/sbin/hastd/hastd.c 207371 2010-04-29 15:36:32Z pjd $");
32
33#include <sys/param.h>
34#include <sys/linker.h>
35#include <sys/module.h>
36#include <sys/wait.h>
37
38#include <assert.h>
39#include <err.h>
40#include <errno.h>
41#include <libutil.h>
42#include <signal.h>
43#include <stdbool.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <sysexits.h>
48#include <unistd.h>
49
50#include <activemap.h>
51#include <pjdlog.h>
52
53#include "control.h"
54#include "hast.h"
55#include "hast_proto.h"
56#include "hastd.h"
57#include "subr.h"
58
59/* Path to configuration file. */
60static const char *cfgpath = HAST_CONFIG;
61/* Hastd configuration. */
62static struct hastd_config *cfg;
63/* Was SIGCHLD signal received? */
64static bool sigchld_received = false;
65/* Was SIGHUP signal received? */
66static bool sighup_received = false;
67/* Was SIGINT or SIGTERM signal received? */
68bool sigexit_received = false;
69/* PID file handle. */
70struct pidfh *pfh;
71
72static void
73usage(void)
74{
75
76 errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
77}
78
79static void
80sighandler(int sig)
81{
82
83 switch (sig) {
84 case SIGCHLD:
85 sigchld_received = true;
86 break;
87 case SIGHUP:
88 sighup_received = true;
89 break;
90 default:
91 assert(!"invalid condition");
92 }
93}
94
95static void
96g_gate_load(void)
97{
98
99 if (modfind("g_gate") == -1) {
100 /* Not present in kernel, try loading it. */
101 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
102 if (errno != EEXIST) {
103 pjdlog_exit(EX_OSERR,
104 "Unable to load geom_gate module");
105 }
106 }
107 }
108}
109
110static void
111child_exit(void)
112{
113 struct hast_resource *res;
114 int status;
115 pid_t pid;
116
117 while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
118 /* Find resource related to the process that just exited. */
119 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
120 if (pid == res->hr_workerpid)
121 break;
122 }
123 if (res == NULL) {
124 /*
125 * This can happen when new connection arrives and we
126 * cancel child responsible for the old one.
127 */
128 continue;
129 }
130 pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
131 role2str(res->hr_role));
132 if (WEXITSTATUS(status) == 0) {
133 pjdlog_debug(1,
134 "Worker process exited gracefully (pid=%u).",
135 (unsigned int)pid);
136 } else {
137 pjdlog_error("Worker process failed (pid=%u, status=%d).",
138 (unsigned int)pid, WEXITSTATUS(status));
139 }
140 proto_close(res->hr_ctrl);
141 res->hr_workerpid = 0;
142 if (res->hr_role == HAST_ROLE_PRIMARY) {
143 if (WEXITSTATUS(status) == EX_TEMPFAIL) {
144 sleep(1);
145 pjdlog_info("Restarting worker process.");
146 hastd_primary(res);
147 } else {
148 res->hr_role = HAST_ROLE_INIT;
149 pjdlog_info("Changing resource role back to %s.",
150 role2str(res->hr_role));
151 }
152 }
153 pjdlog_prefix_set("%s", "");
154 }
155}
156
157static void
158hastd_reload(void)
159{
160
161 /* TODO */
162 pjdlog_warning("Configuration reload is not implemented.");
163}
164
165static void
166listen_accept(void)
167{
168 struct hast_resource *res;
169 struct proto_conn *conn;
170 struct nv *nvin, *nvout, *nverr;
171 const char *resname;
172 const unsigned char *token;
173 char laddr[256], raddr[256];
174 size_t size;
175 pid_t pid;
176 int status;
177
178 proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr));
179 pjdlog_debug(1, "Accepting connection to %s.", laddr);
180
181 if (proto_accept(cfg->hc_listenconn, &conn) < 0) {
182 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
183 return;
184 }
185
186 proto_local_address(conn, laddr, sizeof(laddr));
187 proto_remote_address(conn, raddr, sizeof(raddr));
188 pjdlog_info("Connection from %s to %s.", laddr, raddr);
189
32
33#include <sys/param.h>
34#include <sys/linker.h>
35#include <sys/module.h>
36#include <sys/wait.h>
37
38#include <assert.h>
39#include <err.h>
40#include <errno.h>
41#include <libutil.h>
42#include <signal.h>
43#include <stdbool.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <sysexits.h>
48#include <unistd.h>
49
50#include <activemap.h>
51#include <pjdlog.h>
52
53#include "control.h"
54#include "hast.h"
55#include "hast_proto.h"
56#include "hastd.h"
57#include "subr.h"
58
59/* Path to configuration file. */
60static const char *cfgpath = HAST_CONFIG;
61/* Hastd configuration. */
62static struct hastd_config *cfg;
63/* Was SIGCHLD signal received? */
64static bool sigchld_received = false;
65/* Was SIGHUP signal received? */
66static bool sighup_received = false;
67/* Was SIGINT or SIGTERM signal received? */
68bool sigexit_received = false;
69/* PID file handle. */
70struct pidfh *pfh;
71
72static void
73usage(void)
74{
75
76 errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
77}
78
79static void
80sighandler(int sig)
81{
82
83 switch (sig) {
84 case SIGCHLD:
85 sigchld_received = true;
86 break;
87 case SIGHUP:
88 sighup_received = true;
89 break;
90 default:
91 assert(!"invalid condition");
92 }
93}
94
95static void
96g_gate_load(void)
97{
98
99 if (modfind("g_gate") == -1) {
100 /* Not present in kernel, try loading it. */
101 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
102 if (errno != EEXIST) {
103 pjdlog_exit(EX_OSERR,
104 "Unable to load geom_gate module");
105 }
106 }
107 }
108}
109
110static void
111child_exit(void)
112{
113 struct hast_resource *res;
114 int status;
115 pid_t pid;
116
117 while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
118 /* Find resource related to the process that just exited. */
119 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
120 if (pid == res->hr_workerpid)
121 break;
122 }
123 if (res == NULL) {
124 /*
125 * This can happen when new connection arrives and we
126 * cancel child responsible for the old one.
127 */
128 continue;
129 }
130 pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
131 role2str(res->hr_role));
132 if (WEXITSTATUS(status) == 0) {
133 pjdlog_debug(1,
134 "Worker process exited gracefully (pid=%u).",
135 (unsigned int)pid);
136 } else {
137 pjdlog_error("Worker process failed (pid=%u, status=%d).",
138 (unsigned int)pid, WEXITSTATUS(status));
139 }
140 proto_close(res->hr_ctrl);
141 res->hr_workerpid = 0;
142 if (res->hr_role == HAST_ROLE_PRIMARY) {
143 if (WEXITSTATUS(status) == EX_TEMPFAIL) {
144 sleep(1);
145 pjdlog_info("Restarting worker process.");
146 hastd_primary(res);
147 } else {
148 res->hr_role = HAST_ROLE_INIT;
149 pjdlog_info("Changing resource role back to %s.",
150 role2str(res->hr_role));
151 }
152 }
153 pjdlog_prefix_set("%s", "");
154 }
155}
156
157static void
158hastd_reload(void)
159{
160
161 /* TODO */
162 pjdlog_warning("Configuration reload is not implemented.");
163}
164
165static void
166listen_accept(void)
167{
168 struct hast_resource *res;
169 struct proto_conn *conn;
170 struct nv *nvin, *nvout, *nverr;
171 const char *resname;
172 const unsigned char *token;
173 char laddr[256], raddr[256];
174 size_t size;
175 pid_t pid;
176 int status;
177
178 proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr));
179 pjdlog_debug(1, "Accepting connection to %s.", laddr);
180
181 if (proto_accept(cfg->hc_listenconn, &conn) < 0) {
182 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
183 return;
184 }
185
186 proto_local_address(conn, laddr, sizeof(laddr));
187 proto_remote_address(conn, raddr, sizeof(raddr));
188 pjdlog_info("Connection from %s to %s.", laddr, raddr);
189
190 /* Error in setting timeout is not critical, but why should it fail? */
191 if (proto_timeout(conn, HAST_TIMEOUT) < 0)
192 pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
193
190 nvin = nvout = nverr = NULL;
191
192 /*
193 * Before receiving any data see if remote host have access to any
194 * resource.
195 */
196 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
197 if (proto_address_match(conn, res->hr_remoteaddr))
198 break;
199 }
200 if (res == NULL) {
201 pjdlog_error("Client %s isn't known.", raddr);
202 goto close;
203 }
204 /* Ok, remote host can access at least one resource. */
205
206 if (hast_proto_recv_hdr(conn, &nvin) < 0) {
207 pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
208 raddr);
209 goto close;
210 }
211
212 resname = nv_get_string(nvin, "resource");
213 if (resname == NULL) {
214 pjdlog_error("No 'resource' field in the header received from %s.",
215 raddr);
216 goto close;
217 }
218 pjdlog_debug(2, "%s: resource=%s", raddr, resname);
219 token = nv_get_uint8_array(nvin, &size, "token");
220 /*
221 * NULL token means that this is first conection.
222 */
223 if (token != NULL && size != sizeof(res->hr_token)) {
224 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
225 raddr, sizeof(res->hr_token), size);
226 goto close;
227 }
228
229 /*
230 * From now on we want to send errors to the remote node.
231 */
232 nverr = nv_alloc();
233
234 /* Find resource related to this connection. */
235 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
236 if (strcmp(resname, res->hr_name) == 0)
237 break;
238 }
239 /* Have we found the resource? */
240 if (res == NULL) {
241 pjdlog_error("No resource '%s' as requested by %s.",
242 resname, raddr);
243 nv_add_stringf(nverr, "errmsg", "Resource not configured.");
244 goto fail;
245 }
246
247 /* Now that we know resource name setup log prefix. */
248 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
249
250 /* Does the remote host have access to this resource? */
251 if (!proto_address_match(conn, res->hr_remoteaddr)) {
252 pjdlog_error("Client %s has no access to the resource.", raddr);
253 nv_add_stringf(nverr, "errmsg", "No access to the resource.");
254 goto fail;
255 }
256 /* Is the resource marked as secondary? */
257 if (res->hr_role != HAST_ROLE_SECONDARY) {
258 pjdlog_error("We act as %s for the resource and not as %s as requested by %s.",
259 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
260 raddr);
261 nv_add_stringf(nverr, "errmsg",
262 "Remote node acts as %s for the resource and not as %s.",
263 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
264 goto fail;
265 }
266 /* Does token (if exists) match? */
267 if (token != NULL && memcmp(token, res->hr_token,
268 sizeof(res->hr_token)) != 0) {
269 pjdlog_error("Token received from %s doesn't match.", raddr);
270 nv_add_stringf(nverr, "errmsg", "Toke doesn't match.");
271 goto fail;
272 }
273 /*
274 * If there is no token, but we have half-open connection
275 * (only remotein) or full connection (worker process is running)
276 * we have to cancel those and accept the new connection.
277 */
278 if (token == NULL) {
279 assert(res->hr_remoteout == NULL);
280 pjdlog_debug(1, "Initial connection from %s.", raddr);
281 if (res->hr_workerpid != 0) {
282 assert(res->hr_remotein == NULL);
283 pjdlog_debug(1,
284 "Worker process exists (pid=%u), stopping it.",
285 (unsigned int)res->hr_workerpid);
286 /* Stop child process. */
287 if (kill(res->hr_workerpid, SIGINT) < 0) {
288 pjdlog_errno(LOG_ERR,
289 "Unable to stop worker process (pid=%u)",
290 (unsigned int)res->hr_workerpid);
291 /*
292 * Other than logging the problem we
293 * ignore it - nothing smart to do.
294 */
295 }
296 /* Wait for it to exit. */
297 else if ((pid = waitpid(res->hr_workerpid,
298 &status, 0)) != res->hr_workerpid) {
299 pjdlog_errno(LOG_ERR,
300 "Waiting for worker process (pid=%u) failed",
301 (unsigned int)res->hr_workerpid);
302 /* See above. */
303 } else if (WEXITSTATUS(status) != 0) {
304 pjdlog_error("Worker process (pid=%u) exited ungracefully: status=%d.",
305 (unsigned int)res->hr_workerpid,
306 WEXITSTATUS(status));
307 /* See above. */
308 } else {
309 pjdlog_debug(1,
310 "Worker process (pid=%u) exited gracefully.",
311 (unsigned int)res->hr_workerpid);
312 }
313 res->hr_workerpid = 0;
314 } else if (res->hr_remotein != NULL) {
315 char oaddr[256];
316
317 proto_remote_address(conn, oaddr, sizeof(oaddr));
318 pjdlog_debug(1,
319 "Canceling half-open connection from %s on connection from %s.",
320 oaddr, raddr);
321 proto_close(res->hr_remotein);
322 res->hr_remotein = NULL;
323 }
324 }
325
326 /*
327 * Checks and cleanups are done.
328 */
329
330 if (token == NULL) {
331 arc4random_buf(res->hr_token, sizeof(res->hr_token));
332 nvout = nv_alloc();
333 nv_add_uint8_array(nvout, res->hr_token,
334 sizeof(res->hr_token), "token");
335 if (nv_error(nvout) != 0) {
336 pjdlog_common(LOG_ERR, 0, nv_error(nvout),
337 "Unable to prepare return header for %s", raddr);
338 nv_add_stringf(nverr, "errmsg",
339 "Remote node was unable to prepare return header: %s.",
340 strerror(nv_error(nvout)));
341 goto fail;
342 }
343 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) {
344 int error = errno;
345
346 pjdlog_errno(LOG_ERR, "Unable to send response to %s",
347 raddr);
348 nv_add_stringf(nverr, "errmsg",
349 "Remote node was unable to send response: %s.",
350 strerror(error));
351 goto fail;
352 }
353 res->hr_remotein = conn;
354 pjdlog_debug(1, "Incoming connection from %s configured.",
355 raddr);
356 } else {
357 res->hr_remoteout = conn;
358 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
359 hastd_secondary(res, nvin);
360 }
361 nv_free(nvin);
362 nv_free(nvout);
363 nv_free(nverr);
364 pjdlog_prefix_set("%s", "");
365 return;
366fail:
367 if (nv_error(nverr) != 0) {
368 pjdlog_common(LOG_ERR, 0, nv_error(nverr),
369 "Unable to prepare error header for %s", raddr);
370 goto close;
371 }
372 if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) {
373 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
374 goto close;
375 }
376close:
377 if (nvin != NULL)
378 nv_free(nvin);
379 if (nvout != NULL)
380 nv_free(nvout);
381 if (nverr != NULL)
382 nv_free(nverr);
383 proto_close(conn);
384 pjdlog_prefix_set("%s", "");
385}
386
387static void
388main_loop(void)
389{
390 fd_set rfds, wfds;
391 int fd, maxfd, ret;
392
393 for (;;) {
394 if (sigchld_received) {
395 sigchld_received = false;
396 child_exit();
397 }
398 if (sighup_received) {
399 sighup_received = false;
400 hastd_reload();
401 }
402
403 maxfd = 0;
404 FD_ZERO(&rfds);
405 FD_ZERO(&wfds);
406
407 /* Setup descriptors for select(2). */
408#define SETUP_FD(conn) do { \
409 fd = proto_descriptor(conn); \
410 if (fd >= 0) { \
411 maxfd = fd > maxfd ? fd : maxfd; \
412 FD_SET(fd, &rfds); \
413 FD_SET(fd, &wfds); \
414 } \
415} while (0)
416 SETUP_FD(cfg->hc_controlconn);
417 SETUP_FD(cfg->hc_listenconn);
418#undef SETUP_FD
419
420 ret = select(maxfd + 1, &rfds, &wfds, NULL, NULL);
421 if (ret == -1) {
422 if (errno == EINTR)
423 continue;
424 KEEP_ERRNO((void)pidfile_remove(pfh));
425 pjdlog_exit(EX_OSERR, "select() failed");
426 }
427
428#define ISSET_FD(conn) \
429 (FD_ISSET((fd = proto_descriptor(conn)), &rfds) || FD_ISSET(fd, &wfds))
430 if (ISSET_FD(cfg->hc_controlconn))
431 control_handle(cfg);
432 if (ISSET_FD(cfg->hc_listenconn))
433 listen_accept();
434#undef ISSET_FD
435 }
436}
437
438int
439main(int argc, char *argv[])
440{
441 const char *pidfile;
442 pid_t otherpid;
443 bool foreground;
444 int debuglevel;
445
446 g_gate_load();
447
448 foreground = false;
449 debuglevel = 0;
450 pidfile = HASTD_PIDFILE;
451
452 for (;;) {
453 int ch;
454
455 ch = getopt(argc, argv, "c:dFhP:");
456 if (ch == -1)
457 break;
458 switch (ch) {
459 case 'c':
460 cfgpath = optarg;
461 break;
462 case 'd':
463 debuglevel++;
464 break;
465 case 'F':
466 foreground = true;
467 break;
468 case 'P':
469 pidfile = optarg;
470 break;
471 case 'h':
472 default:
473 usage();
474 }
475 }
476 argc -= optind;
477 argv += optind;
478
479 pjdlog_debug_set(debuglevel);
480
481 pfh = pidfile_open(pidfile, 0600, &otherpid);
482 if (pfh == NULL) {
483 if (errno == EEXIST) {
484 pjdlog_exitx(EX_TEMPFAIL,
485 "Another hastd is already running, pid: %jd.",
486 (intmax_t)otherpid);
487 }
488 /* If we cannot create pidfile from other reasons, only warn. */
489 pjdlog_errno(LOG_WARNING, "Cannot open or create pidfile");
490 }
491
492 cfg = yy_config_parse(cfgpath);
493 assert(cfg != NULL);
494
495 signal(SIGHUP, sighandler);
496 signal(SIGCHLD, sighandler);
497
498 /* Listen on control address. */
499 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) {
500 KEEP_ERRNO((void)pidfile_remove(pfh));
501 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
502 cfg->hc_controladdr);
503 }
504 /* Listen for remote connections. */
505 if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) {
506 KEEP_ERRNO((void)pidfile_remove(pfh));
507 pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
508 cfg->hc_listenaddr);
509 }
510
511 if (!foreground) {
512 if (daemon(0, 0) < 0) {
513 KEEP_ERRNO((void)pidfile_remove(pfh));
514 pjdlog_exit(EX_OSERR, "Unable to daemonize");
515 }
516
517 /* Start logging to syslog. */
518 pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
519
520 /* Write PID to a file. */
521 if (pidfile_write(pfh) < 0) {
522 pjdlog_errno(LOG_WARNING,
523 "Unable to write PID to a file");
524 }
525 }
526
527 main_loop();
528
529 exit(0);
530}
194 nvin = nvout = nverr = NULL;
195
196 /*
197 * Before receiving any data see if remote host have access to any
198 * resource.
199 */
200 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
201 if (proto_address_match(conn, res->hr_remoteaddr))
202 break;
203 }
204 if (res == NULL) {
205 pjdlog_error("Client %s isn't known.", raddr);
206 goto close;
207 }
208 /* Ok, remote host can access at least one resource. */
209
210 if (hast_proto_recv_hdr(conn, &nvin) < 0) {
211 pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
212 raddr);
213 goto close;
214 }
215
216 resname = nv_get_string(nvin, "resource");
217 if (resname == NULL) {
218 pjdlog_error("No 'resource' field in the header received from %s.",
219 raddr);
220 goto close;
221 }
222 pjdlog_debug(2, "%s: resource=%s", raddr, resname);
223 token = nv_get_uint8_array(nvin, &size, "token");
224 /*
225 * NULL token means that this is first conection.
226 */
227 if (token != NULL && size != sizeof(res->hr_token)) {
228 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
229 raddr, sizeof(res->hr_token), size);
230 goto close;
231 }
232
233 /*
234 * From now on we want to send errors to the remote node.
235 */
236 nverr = nv_alloc();
237
238 /* Find resource related to this connection. */
239 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
240 if (strcmp(resname, res->hr_name) == 0)
241 break;
242 }
243 /* Have we found the resource? */
244 if (res == NULL) {
245 pjdlog_error("No resource '%s' as requested by %s.",
246 resname, raddr);
247 nv_add_stringf(nverr, "errmsg", "Resource not configured.");
248 goto fail;
249 }
250
251 /* Now that we know resource name setup log prefix. */
252 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
253
254 /* Does the remote host have access to this resource? */
255 if (!proto_address_match(conn, res->hr_remoteaddr)) {
256 pjdlog_error("Client %s has no access to the resource.", raddr);
257 nv_add_stringf(nverr, "errmsg", "No access to the resource.");
258 goto fail;
259 }
260 /* Is the resource marked as secondary? */
261 if (res->hr_role != HAST_ROLE_SECONDARY) {
262 pjdlog_error("We act as %s for the resource and not as %s as requested by %s.",
263 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
264 raddr);
265 nv_add_stringf(nverr, "errmsg",
266 "Remote node acts as %s for the resource and not as %s.",
267 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
268 goto fail;
269 }
270 /* Does token (if exists) match? */
271 if (token != NULL && memcmp(token, res->hr_token,
272 sizeof(res->hr_token)) != 0) {
273 pjdlog_error("Token received from %s doesn't match.", raddr);
274 nv_add_stringf(nverr, "errmsg", "Toke doesn't match.");
275 goto fail;
276 }
277 /*
278 * If there is no token, but we have half-open connection
279 * (only remotein) or full connection (worker process is running)
280 * we have to cancel those and accept the new connection.
281 */
282 if (token == NULL) {
283 assert(res->hr_remoteout == NULL);
284 pjdlog_debug(1, "Initial connection from %s.", raddr);
285 if (res->hr_workerpid != 0) {
286 assert(res->hr_remotein == NULL);
287 pjdlog_debug(1,
288 "Worker process exists (pid=%u), stopping it.",
289 (unsigned int)res->hr_workerpid);
290 /* Stop child process. */
291 if (kill(res->hr_workerpid, SIGINT) < 0) {
292 pjdlog_errno(LOG_ERR,
293 "Unable to stop worker process (pid=%u)",
294 (unsigned int)res->hr_workerpid);
295 /*
296 * Other than logging the problem we
297 * ignore it - nothing smart to do.
298 */
299 }
300 /* Wait for it to exit. */
301 else if ((pid = waitpid(res->hr_workerpid,
302 &status, 0)) != res->hr_workerpid) {
303 pjdlog_errno(LOG_ERR,
304 "Waiting for worker process (pid=%u) failed",
305 (unsigned int)res->hr_workerpid);
306 /* See above. */
307 } else if (WEXITSTATUS(status) != 0) {
308 pjdlog_error("Worker process (pid=%u) exited ungracefully: status=%d.",
309 (unsigned int)res->hr_workerpid,
310 WEXITSTATUS(status));
311 /* See above. */
312 } else {
313 pjdlog_debug(1,
314 "Worker process (pid=%u) exited gracefully.",
315 (unsigned int)res->hr_workerpid);
316 }
317 res->hr_workerpid = 0;
318 } else if (res->hr_remotein != NULL) {
319 char oaddr[256];
320
321 proto_remote_address(conn, oaddr, sizeof(oaddr));
322 pjdlog_debug(1,
323 "Canceling half-open connection from %s on connection from %s.",
324 oaddr, raddr);
325 proto_close(res->hr_remotein);
326 res->hr_remotein = NULL;
327 }
328 }
329
330 /*
331 * Checks and cleanups are done.
332 */
333
334 if (token == NULL) {
335 arc4random_buf(res->hr_token, sizeof(res->hr_token));
336 nvout = nv_alloc();
337 nv_add_uint8_array(nvout, res->hr_token,
338 sizeof(res->hr_token), "token");
339 if (nv_error(nvout) != 0) {
340 pjdlog_common(LOG_ERR, 0, nv_error(nvout),
341 "Unable to prepare return header for %s", raddr);
342 nv_add_stringf(nverr, "errmsg",
343 "Remote node was unable to prepare return header: %s.",
344 strerror(nv_error(nvout)));
345 goto fail;
346 }
347 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) {
348 int error = errno;
349
350 pjdlog_errno(LOG_ERR, "Unable to send response to %s",
351 raddr);
352 nv_add_stringf(nverr, "errmsg",
353 "Remote node was unable to send response: %s.",
354 strerror(error));
355 goto fail;
356 }
357 res->hr_remotein = conn;
358 pjdlog_debug(1, "Incoming connection from %s configured.",
359 raddr);
360 } else {
361 res->hr_remoteout = conn;
362 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
363 hastd_secondary(res, nvin);
364 }
365 nv_free(nvin);
366 nv_free(nvout);
367 nv_free(nverr);
368 pjdlog_prefix_set("%s", "");
369 return;
370fail:
371 if (nv_error(nverr) != 0) {
372 pjdlog_common(LOG_ERR, 0, nv_error(nverr),
373 "Unable to prepare error header for %s", raddr);
374 goto close;
375 }
376 if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) {
377 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
378 goto close;
379 }
380close:
381 if (nvin != NULL)
382 nv_free(nvin);
383 if (nvout != NULL)
384 nv_free(nvout);
385 if (nverr != NULL)
386 nv_free(nverr);
387 proto_close(conn);
388 pjdlog_prefix_set("%s", "");
389}
390
391static void
392main_loop(void)
393{
394 fd_set rfds, wfds;
395 int fd, maxfd, ret;
396
397 for (;;) {
398 if (sigchld_received) {
399 sigchld_received = false;
400 child_exit();
401 }
402 if (sighup_received) {
403 sighup_received = false;
404 hastd_reload();
405 }
406
407 maxfd = 0;
408 FD_ZERO(&rfds);
409 FD_ZERO(&wfds);
410
411 /* Setup descriptors for select(2). */
412#define SETUP_FD(conn) do { \
413 fd = proto_descriptor(conn); \
414 if (fd >= 0) { \
415 maxfd = fd > maxfd ? fd : maxfd; \
416 FD_SET(fd, &rfds); \
417 FD_SET(fd, &wfds); \
418 } \
419} while (0)
420 SETUP_FD(cfg->hc_controlconn);
421 SETUP_FD(cfg->hc_listenconn);
422#undef SETUP_FD
423
424 ret = select(maxfd + 1, &rfds, &wfds, NULL, NULL);
425 if (ret == -1) {
426 if (errno == EINTR)
427 continue;
428 KEEP_ERRNO((void)pidfile_remove(pfh));
429 pjdlog_exit(EX_OSERR, "select() failed");
430 }
431
432#define ISSET_FD(conn) \
433 (FD_ISSET((fd = proto_descriptor(conn)), &rfds) || FD_ISSET(fd, &wfds))
434 if (ISSET_FD(cfg->hc_controlconn))
435 control_handle(cfg);
436 if (ISSET_FD(cfg->hc_listenconn))
437 listen_accept();
438#undef ISSET_FD
439 }
440}
441
442int
443main(int argc, char *argv[])
444{
445 const char *pidfile;
446 pid_t otherpid;
447 bool foreground;
448 int debuglevel;
449
450 g_gate_load();
451
452 foreground = false;
453 debuglevel = 0;
454 pidfile = HASTD_PIDFILE;
455
456 for (;;) {
457 int ch;
458
459 ch = getopt(argc, argv, "c:dFhP:");
460 if (ch == -1)
461 break;
462 switch (ch) {
463 case 'c':
464 cfgpath = optarg;
465 break;
466 case 'd':
467 debuglevel++;
468 break;
469 case 'F':
470 foreground = true;
471 break;
472 case 'P':
473 pidfile = optarg;
474 break;
475 case 'h':
476 default:
477 usage();
478 }
479 }
480 argc -= optind;
481 argv += optind;
482
483 pjdlog_debug_set(debuglevel);
484
485 pfh = pidfile_open(pidfile, 0600, &otherpid);
486 if (pfh == NULL) {
487 if (errno == EEXIST) {
488 pjdlog_exitx(EX_TEMPFAIL,
489 "Another hastd is already running, pid: %jd.",
490 (intmax_t)otherpid);
491 }
492 /* If we cannot create pidfile from other reasons, only warn. */
493 pjdlog_errno(LOG_WARNING, "Cannot open or create pidfile");
494 }
495
496 cfg = yy_config_parse(cfgpath);
497 assert(cfg != NULL);
498
499 signal(SIGHUP, sighandler);
500 signal(SIGCHLD, sighandler);
501
502 /* Listen on control address. */
503 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) {
504 KEEP_ERRNO((void)pidfile_remove(pfh));
505 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
506 cfg->hc_controladdr);
507 }
508 /* Listen for remote connections. */
509 if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) {
510 KEEP_ERRNO((void)pidfile_remove(pfh));
511 pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
512 cfg->hc_listenaddr);
513 }
514
515 if (!foreground) {
516 if (daemon(0, 0) < 0) {
517 KEEP_ERRNO((void)pidfile_remove(pfh));
518 pjdlog_exit(EX_OSERR, "Unable to daemonize");
519 }
520
521 /* Start logging to syslog. */
522 pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
523
524 /* Write PID to a file. */
525 if (pidfile_write(pfh) < 0) {
526 pjdlog_errno(LOG_WARNING,
527 "Unable to write PID to a file");
528 }
529 }
530
531 main_loop();
532
533 exit(0);
534}