Deleted Added
full compact
hastd.c (210886) hastd.c (211886)
1/*-
2 * Copyright (c) 2009-2010 The FreeBSD Foundation
3 * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
4 * All rights reserved.
5 *
6 * This software was developed by Pawel Jakub Dawidek under sponsorship from
7 * the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2009-2010 The FreeBSD Foundation
3 * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
4 * All rights reserved.
5 *
6 * This software was developed by Pawel Jakub Dawidek under sponsorship from
7 * the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sbin/hastd/hastd.c 210886 2010-08-05 19:16:31Z pjd $");
32__FBSDID("$FreeBSD: head/sbin/hastd/hastd.c 211886 2010-08-27 15:16:52Z pjd $");
33
34#include <sys/param.h>
35#include <sys/linker.h>
36#include <sys/module.h>
37#include <sys/wait.h>
38
39#include <assert.h>
40#include <err.h>
41#include <errno.h>
42#include <libutil.h>
43#include <signal.h>
44#include <stdbool.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <sysexits.h>
49#include <unistd.h>
50
51#include <activemap.h>
52#include <pjdlog.h>
53
54#include "control.h"
55#include "hast.h"
56#include "hast_proto.h"
57#include "hastd.h"
58#include "subr.h"
59
60/* Path to configuration file. */
61const char *cfgpath = HAST_CONFIG;
62/* Hastd configuration. */
63static struct hastd_config *cfg;
64/* Was SIGCHLD signal received? */
33
34#include <sys/param.h>
35#include <sys/linker.h>
36#include <sys/module.h>
37#include <sys/wait.h>
38
39#include <assert.h>
40#include <err.h>
41#include <errno.h>
42#include <libutil.h>
43#include <signal.h>
44#include <stdbool.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <sysexits.h>
49#include <unistd.h>
50
51#include <activemap.h>
52#include <pjdlog.h>
53
54#include "control.h"
55#include "hast.h"
56#include "hast_proto.h"
57#include "hastd.h"
58#include "subr.h"
59
60/* Path to configuration file. */
61const char *cfgpath = HAST_CONFIG;
62/* Hastd configuration. */
63static struct hastd_config *cfg;
64/* Was SIGCHLD signal received? */
65static bool sigchld_received = false;
65bool sigchld_received = false;
66/* Was SIGHUP signal received? */
67bool sighup_received = false;
68/* Was SIGINT or SIGTERM signal received? */
69bool sigexit_received = false;
70/* PID file handle. */
71struct pidfh *pfh;
72
73static void
74usage(void)
75{
76
77 errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
78}
79
80static void
81sighandler(int sig)
82{
83
84 switch (sig) {
85 case SIGCHLD:
86 sigchld_received = true;
87 break;
88 case SIGHUP:
89 sighup_received = true;
90 break;
91 default:
92 assert(!"invalid condition");
93 }
94}
95
96static void
97g_gate_load(void)
98{
99
100 if (modfind("g_gate") == -1) {
101 /* Not present in kernel, try loading it. */
102 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
103 if (errno != EEXIST) {
104 pjdlog_exit(EX_OSERR,
105 "Unable to load geom_gate module");
106 }
107 }
108 }
109}
110
111static void
112child_exit_log(unsigned int pid, int status)
113{
114
115 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
116 pjdlog_debug(1, "Worker process exited gracefully (pid=%u).",
117 pid);
118 } else if (WIFSIGNALED(status)) {
119 pjdlog_error("Worker process killed (pid=%u, signal=%d).",
120 pid, WTERMSIG(status));
121 } else {
122 pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).",
123 pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1);
124 }
125}
126
127static void
128child_exit(void)
129{
130 struct hast_resource *res;
131 int status;
132 pid_t pid;
133
134 while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
135 /* Find resource related to the process that just exited. */
136 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
137 if (pid == res->hr_workerpid)
138 break;
139 }
140 if (res == NULL) {
141 /*
142 * This can happen when new connection arrives and we
143 * cancel child responsible for the old one.
144 */
145 continue;
146 }
147 pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
148 role2str(res->hr_role));
149 child_exit_log(pid, status);
150 proto_close(res->hr_ctrl);
151 res->hr_workerpid = 0;
152 if (res->hr_role == HAST_ROLE_PRIMARY) {
153 /*
154 * Restart child process if it was killed by signal
155 * or exited because of temporary problem.
156 */
157 if (WIFSIGNALED(status) ||
158 (WIFEXITED(status) &&
159 WEXITSTATUS(status) == EX_TEMPFAIL)) {
160 sleep(1);
161 pjdlog_info("Restarting worker process.");
162 hastd_primary(res);
163 } else {
164 res->hr_role = HAST_ROLE_INIT;
165 pjdlog_info("Changing resource role back to %s.",
166 role2str(res->hr_role));
167 }
168 }
169 pjdlog_prefix_set("%s", "");
170 }
171}
172
173static bool
174resource_needs_restart(const struct hast_resource *res0,
175 const struct hast_resource *res1)
176{
177
178 assert(strcmp(res0->hr_name, res1->hr_name) == 0);
179
180 if (strcmp(res0->hr_provname, res1->hr_provname) != 0)
181 return (true);
182 if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0)
183 return (true);
184 if (res0->hr_role == HAST_ROLE_INIT ||
185 res0->hr_role == HAST_ROLE_SECONDARY) {
186 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
187 return (true);
188 if (res0->hr_replication != res1->hr_replication)
189 return (true);
190 if (res0->hr_timeout != res1->hr_timeout)
191 return (true);
66/* Was SIGHUP signal received? */
67bool sighup_received = false;
68/* Was SIGINT or SIGTERM signal received? */
69bool sigexit_received = false;
70/* PID file handle. */
71struct pidfh *pfh;
72
73static void
74usage(void)
75{
76
77 errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
78}
79
80static void
81sighandler(int sig)
82{
83
84 switch (sig) {
85 case SIGCHLD:
86 sigchld_received = true;
87 break;
88 case SIGHUP:
89 sighup_received = true;
90 break;
91 default:
92 assert(!"invalid condition");
93 }
94}
95
96static void
97g_gate_load(void)
98{
99
100 if (modfind("g_gate") == -1) {
101 /* Not present in kernel, try loading it. */
102 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
103 if (errno != EEXIST) {
104 pjdlog_exit(EX_OSERR,
105 "Unable to load geom_gate module");
106 }
107 }
108 }
109}
110
111static void
112child_exit_log(unsigned int pid, int status)
113{
114
115 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
116 pjdlog_debug(1, "Worker process exited gracefully (pid=%u).",
117 pid);
118 } else if (WIFSIGNALED(status)) {
119 pjdlog_error("Worker process killed (pid=%u, signal=%d).",
120 pid, WTERMSIG(status));
121 } else {
122 pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).",
123 pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1);
124 }
125}
126
127static void
128child_exit(void)
129{
130 struct hast_resource *res;
131 int status;
132 pid_t pid;
133
134 while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
135 /* Find resource related to the process that just exited. */
136 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
137 if (pid == res->hr_workerpid)
138 break;
139 }
140 if (res == NULL) {
141 /*
142 * This can happen when new connection arrives and we
143 * cancel child responsible for the old one.
144 */
145 continue;
146 }
147 pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
148 role2str(res->hr_role));
149 child_exit_log(pid, status);
150 proto_close(res->hr_ctrl);
151 res->hr_workerpid = 0;
152 if (res->hr_role == HAST_ROLE_PRIMARY) {
153 /*
154 * Restart child process if it was killed by signal
155 * or exited because of temporary problem.
156 */
157 if (WIFSIGNALED(status) ||
158 (WIFEXITED(status) &&
159 WEXITSTATUS(status) == EX_TEMPFAIL)) {
160 sleep(1);
161 pjdlog_info("Restarting worker process.");
162 hastd_primary(res);
163 } else {
164 res->hr_role = HAST_ROLE_INIT;
165 pjdlog_info("Changing resource role back to %s.",
166 role2str(res->hr_role));
167 }
168 }
169 pjdlog_prefix_set("%s", "");
170 }
171}
172
173static bool
174resource_needs_restart(const struct hast_resource *res0,
175 const struct hast_resource *res1)
176{
177
178 assert(strcmp(res0->hr_name, res1->hr_name) == 0);
179
180 if (strcmp(res0->hr_provname, res1->hr_provname) != 0)
181 return (true);
182 if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0)
183 return (true);
184 if (res0->hr_role == HAST_ROLE_INIT ||
185 res0->hr_role == HAST_ROLE_SECONDARY) {
186 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
187 return (true);
188 if (res0->hr_replication != res1->hr_replication)
189 return (true);
190 if (res0->hr_timeout != res1->hr_timeout)
191 return (true);
192 if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
193 return (true);
192 }
193 return (false);
194}
195
196static bool
197resource_needs_reload(const struct hast_resource *res0,
198 const struct hast_resource *res1)
199{
200
201 assert(strcmp(res0->hr_name, res1->hr_name) == 0);
202 assert(strcmp(res0->hr_provname, res1->hr_provname) == 0);
203 assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0);
204
205 if (res0->hr_role != HAST_ROLE_PRIMARY)
206 return (false);
207
208 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
209 return (true);
210 if (res0->hr_replication != res1->hr_replication)
211 return (true);
212 if (res0->hr_timeout != res1->hr_timeout)
213 return (true);
194 }
195 return (false);
196}
197
198static bool
199resource_needs_reload(const struct hast_resource *res0,
200 const struct hast_resource *res1)
201{
202
203 assert(strcmp(res0->hr_name, res1->hr_name) == 0);
204 assert(strcmp(res0->hr_provname, res1->hr_provname) == 0);
205 assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0);
206
207 if (res0->hr_role != HAST_ROLE_PRIMARY)
208 return (false);
209
210 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
211 return (true);
212 if (res0->hr_replication != res1->hr_replication)
213 return (true);
214 if (res0->hr_timeout != res1->hr_timeout)
215 return (true);
216 if (strcmp(res0->hr_exec, res1->hr_exec) != 0)
217 return (true);
214 return (false);
215}
216
217static void
218hastd_reload(void)
219{
220 struct hastd_config *newcfg;
221 struct hast_resource *nres, *cres, *tres;
222 uint8_t role;
223
224 pjdlog_info("Reloading configuration...");
225
226 newcfg = yy_config_parse(cfgpath, false);
227 if (newcfg == NULL)
228 goto failed;
229
230 /*
231 * Check if control address has changed.
232 */
233 if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) {
234 if (proto_server(newcfg->hc_controladdr,
235 &newcfg->hc_controlconn) < 0) {
236 pjdlog_errno(LOG_ERR,
237 "Unable to listen on control address %s",
238 newcfg->hc_controladdr);
239 goto failed;
240 }
241 }
242 /*
243 * Check if listen address has changed.
244 */
245 if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) {
246 if (proto_server(newcfg->hc_listenaddr,
247 &newcfg->hc_listenconn) < 0) {
248 pjdlog_errno(LOG_ERR, "Unable to listen on address %s",
249 newcfg->hc_listenaddr);
250 goto failed;
251 }
252 }
253 /*
254 * Only when both control and listen sockets are successfully
255 * initialized switch them to new configuration.
256 */
257 if (newcfg->hc_controlconn != NULL) {
258 pjdlog_info("Control socket changed from %s to %s.",
259 cfg->hc_controladdr, newcfg->hc_controladdr);
260 proto_close(cfg->hc_controlconn);
261 cfg->hc_controlconn = newcfg->hc_controlconn;
262 newcfg->hc_controlconn = NULL;
263 strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr,
264 sizeof(cfg->hc_controladdr));
265 }
266 if (newcfg->hc_listenconn != NULL) {
267 pjdlog_info("Listen socket changed from %s to %s.",
268 cfg->hc_listenaddr, newcfg->hc_listenaddr);
269 proto_close(cfg->hc_listenconn);
270 cfg->hc_listenconn = newcfg->hc_listenconn;
271 newcfg->hc_listenconn = NULL;
272 strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr,
273 sizeof(cfg->hc_listenaddr));
274 }
275
276 /*
277 * Stop and remove resources that were removed from the configuration.
278 */
279 TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) {
280 TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) {
281 if (strcmp(cres->hr_name, nres->hr_name) == 0)
282 break;
283 }
284 if (nres == NULL) {
285 control_set_role(cres, HAST_ROLE_INIT);
286 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
287 pjdlog_info("Resource %s removed.", cres->hr_name);
288 free(cres);
289 }
290 }
291 /*
292 * Move new resources to the current configuration.
293 */
294 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
295 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
296 if (strcmp(cres->hr_name, nres->hr_name) == 0)
297 break;
298 }
299 if (cres == NULL) {
300 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
301 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
302 pjdlog_info("Resource %s added.", nres->hr_name);
303 }
304 }
305 /*
306 * Deal with modified resources.
307 * Depending on what has changed exactly we might want to perform
308 * different actions.
309 *
310 * We do full resource restart in the following situations:
311 * Resource role is INIT or SECONDARY.
312 * Resource role is PRIMARY and path to local component or provider
313 * name has changed.
314 * In case of PRIMARY, the worker process will be killed and restarted,
315 * which also means removing /dev/hast/<name> provider and
316 * recreating it.
317 *
318 * We do just reload (send SIGHUP to worker process) if we act as
319 * PRIMARY, but only remote address, replication mode and timeout
320 * has changed. For those, there is no need to restart worker process.
321 * If PRIMARY receives SIGHUP, it will reconnect if remote address or
322 * replication mode has changed or simply set new timeout if only
323 * timeout has changed.
324 */
325 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
326 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
327 if (strcmp(cres->hr_name, nres->hr_name) == 0)
328 break;
329 }
330 assert(cres != NULL);
331 if (resource_needs_restart(cres, nres)) {
332 pjdlog_info("Resource %s configuration was modified, restarting it.",
333 cres->hr_name);
334 role = cres->hr_role;
335 control_set_role(cres, HAST_ROLE_INIT);
336 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
337 free(cres);
338 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
339 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
340 control_set_role(nres, role);
341 } else if (resource_needs_reload(cres, nres)) {
342 pjdlog_info("Resource %s configuration was modified, reloading it.",
343 cres->hr_name);
344 strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr,
345 sizeof(cres->hr_remoteaddr));
346 cres->hr_replication = nres->hr_replication;
347 cres->hr_timeout = nres->hr_timeout;
348 if (cres->hr_workerpid != 0) {
349 if (kill(cres->hr_workerpid, SIGHUP) < 0) {
350 pjdlog_errno(LOG_WARNING,
351 "Unable to send SIGHUP to worker process %u",
352 (unsigned int)cres->hr_workerpid);
353 }
354 }
355 }
356 }
357
358 yy_config_free(newcfg);
359 pjdlog_info("Configuration reloaded successfully.");
360 return;
361failed:
362 if (newcfg != NULL) {
363 if (newcfg->hc_controlconn != NULL)
364 proto_close(newcfg->hc_controlconn);
365 if (newcfg->hc_listenconn != NULL)
366 proto_close(newcfg->hc_listenconn);
367 yy_config_free(newcfg);
368 }
369 pjdlog_warning("Configuration not reloaded.");
370}
371
372static void
373listen_accept(void)
374{
375 struct hast_resource *res;
376 struct proto_conn *conn;
377 struct nv *nvin, *nvout, *nverr;
378 const char *resname;
379 const unsigned char *token;
380 char laddr[256], raddr[256];
381 size_t size;
382 pid_t pid;
383 int status;
384
385 proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr));
386 pjdlog_debug(1, "Accepting connection to %s.", laddr);
387
388 if (proto_accept(cfg->hc_listenconn, &conn) < 0) {
389 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
390 return;
391 }
392
393 proto_local_address(conn, laddr, sizeof(laddr));
394 proto_remote_address(conn, raddr, sizeof(raddr));
395 pjdlog_info("Connection from %s to %s.", raddr, laddr);
396
397 /* Error in setting timeout is not critical, but why should it fail? */
398 if (proto_timeout(conn, HAST_TIMEOUT) < 0)
399 pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
400
401 nvin = nvout = nverr = NULL;
402
403 /*
404 * Before receiving any data see if remote host have access to any
405 * resource.
406 */
407 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
408 if (proto_address_match(conn, res->hr_remoteaddr))
409 break;
410 }
411 if (res == NULL) {
412 pjdlog_error("Client %s isn't known.", raddr);
413 goto close;
414 }
415 /* Ok, remote host can access at least one resource. */
416
417 if (hast_proto_recv_hdr(conn, &nvin) < 0) {
418 pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
419 raddr);
420 goto close;
421 }
422
423 resname = nv_get_string(nvin, "resource");
424 if (resname == NULL) {
425 pjdlog_error("No 'resource' field in the header received from %s.",
426 raddr);
427 goto close;
428 }
429 pjdlog_debug(2, "%s: resource=%s", raddr, resname);
430 token = nv_get_uint8_array(nvin, &size, "token");
431 /*
432 * NULL token means that this is first conection.
433 */
434 if (token != NULL && size != sizeof(res->hr_token)) {
435 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
436 raddr, sizeof(res->hr_token), size);
437 goto close;
438 }
439
440 /*
441 * From now on we want to send errors to the remote node.
442 */
443 nverr = nv_alloc();
444
445 /* Find resource related to this connection. */
446 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
447 if (strcmp(resname, res->hr_name) == 0)
448 break;
449 }
450 /* Have we found the resource? */
451 if (res == NULL) {
452 pjdlog_error("No resource '%s' as requested by %s.",
453 resname, raddr);
454 nv_add_stringf(nverr, "errmsg", "Resource not configured.");
455 goto fail;
456 }
457
458 /* Now that we know resource name setup log prefix. */
459 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
460
461 /* Does the remote host have access to this resource? */
462 if (!proto_address_match(conn, res->hr_remoteaddr)) {
463 pjdlog_error("Client %s has no access to the resource.", raddr);
464 nv_add_stringf(nverr, "errmsg", "No access to the resource.");
465 goto fail;
466 }
467 /* Is the resource marked as secondary? */
468 if (res->hr_role != HAST_ROLE_SECONDARY) {
469 pjdlog_error("We act as %s for the resource and not as %s as requested by %s.",
470 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
471 raddr);
472 nv_add_stringf(nverr, "errmsg",
473 "Remote node acts as %s for the resource and not as %s.",
474 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
475 goto fail;
476 }
477 /* Does token (if exists) match? */
478 if (token != NULL && memcmp(token, res->hr_token,
479 sizeof(res->hr_token)) != 0) {
480 pjdlog_error("Token received from %s doesn't match.", raddr);
481 nv_add_stringf(nverr, "errmsg", "Token doesn't match.");
482 goto fail;
483 }
484 /*
485 * If there is no token, but we have half-open connection
486 * (only remotein) or full connection (worker process is running)
487 * we have to cancel those and accept the new connection.
488 */
489 if (token == NULL) {
490 assert(res->hr_remoteout == NULL);
491 pjdlog_debug(1, "Initial connection from %s.", raddr);
492 if (res->hr_workerpid != 0) {
493 assert(res->hr_remotein == NULL);
494 pjdlog_debug(1,
495 "Worker process exists (pid=%u), stopping it.",
496 (unsigned int)res->hr_workerpid);
497 /* Stop child process. */
498 if (kill(res->hr_workerpid, SIGINT) < 0) {
499 pjdlog_errno(LOG_ERR,
500 "Unable to stop worker process (pid=%u)",
501 (unsigned int)res->hr_workerpid);
502 /*
503 * Other than logging the problem we
504 * ignore it - nothing smart to do.
505 */
506 }
507 /* Wait for it to exit. */
508 else if ((pid = waitpid(res->hr_workerpid,
509 &status, 0)) != res->hr_workerpid) {
510 /* We can only log the problem. */
511 pjdlog_errno(LOG_ERR,
512 "Waiting for worker process (pid=%u) failed",
513 (unsigned int)res->hr_workerpid);
514 } else {
515 child_exit_log(res->hr_workerpid, status);
516 }
517 res->hr_workerpid = 0;
518 } else if (res->hr_remotein != NULL) {
519 char oaddr[256];
520
521 proto_remote_address(conn, oaddr, sizeof(oaddr));
522 pjdlog_debug(1,
523 "Canceling half-open connection from %s on connection from %s.",
524 oaddr, raddr);
525 proto_close(res->hr_remotein);
526 res->hr_remotein = NULL;
527 }
528 }
529
530 /*
531 * Checks and cleanups are done.
532 */
533
534 if (token == NULL) {
535 arc4random_buf(res->hr_token, sizeof(res->hr_token));
536 nvout = nv_alloc();
537 nv_add_uint8_array(nvout, res->hr_token,
538 sizeof(res->hr_token), "token");
539 if (nv_error(nvout) != 0) {
540 pjdlog_common(LOG_ERR, 0, nv_error(nvout),
541 "Unable to prepare return header for %s", raddr);
542 nv_add_stringf(nverr, "errmsg",
543 "Remote node was unable to prepare return header: %s.",
544 strerror(nv_error(nvout)));
545 goto fail;
546 }
547 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) {
548 int error = errno;
549
550 pjdlog_errno(LOG_ERR, "Unable to send response to %s",
551 raddr);
552 nv_add_stringf(nverr, "errmsg",
553 "Remote node was unable to send response: %s.",
554 strerror(error));
555 goto fail;
556 }
557 res->hr_remotein = conn;
558 pjdlog_debug(1, "Incoming connection from %s configured.",
559 raddr);
560 } else {
561 res->hr_remoteout = conn;
562 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
563 hastd_secondary(res, nvin);
564 }
565 nv_free(nvin);
566 nv_free(nvout);
567 nv_free(nverr);
568 pjdlog_prefix_set("%s", "");
569 return;
570fail:
571 if (nv_error(nverr) != 0) {
572 pjdlog_common(LOG_ERR, 0, nv_error(nverr),
573 "Unable to prepare error header for %s", raddr);
574 goto close;
575 }
576 if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) {
577 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
578 goto close;
579 }
580close:
581 if (nvin != NULL)
582 nv_free(nvin);
583 if (nvout != NULL)
584 nv_free(nvout);
585 if (nverr != NULL)
586 nv_free(nverr);
587 proto_close(conn);
588 pjdlog_prefix_set("%s", "");
589}
590
591static void
592main_loop(void)
593{
594 fd_set rfds, wfds;
595 int cfd, lfd, maxfd, ret;
596
597 for (;;) {
598 if (sigchld_received) {
599 sigchld_received = false;
600 child_exit();
601 }
602 if (sighup_received) {
603 sighup_received = false;
604 hastd_reload();
605 }
606
607 cfd = proto_descriptor(cfg->hc_controlconn);
608 lfd = proto_descriptor(cfg->hc_listenconn);
609 maxfd = cfd > lfd ? cfd : lfd;
610
611 /* Setup descriptors for select(2). */
612 FD_ZERO(&rfds);
613 FD_SET(cfd, &rfds);
614 FD_SET(lfd, &rfds);
615 FD_ZERO(&wfds);
616 FD_SET(cfd, &wfds);
617 FD_SET(lfd, &wfds);
618
619 ret = select(maxfd + 1, &rfds, &wfds, NULL, NULL);
620 if (ret == -1) {
621 if (errno == EINTR)
622 continue;
623 KEEP_ERRNO((void)pidfile_remove(pfh));
624 pjdlog_exit(EX_OSERR, "select() failed");
625 }
626
627 if (FD_ISSET(cfd, &rfds) || FD_ISSET(cfd, &wfds))
628 control_handle(cfg);
629 if (FD_ISSET(lfd, &rfds) || FD_ISSET(lfd, &wfds))
630 listen_accept();
631 }
632}
633
634int
635main(int argc, char *argv[])
636{
637 const char *pidfile;
638 pid_t otherpid;
639 bool foreground;
640 int debuglevel;
641
642 g_gate_load();
643
644 foreground = false;
645 debuglevel = 0;
646 pidfile = HASTD_PIDFILE;
647
648 for (;;) {
649 int ch;
650
651 ch = getopt(argc, argv, "c:dFhP:");
652 if (ch == -1)
653 break;
654 switch (ch) {
655 case 'c':
656 cfgpath = optarg;
657 break;
658 case 'd':
659 debuglevel++;
660 break;
661 case 'F':
662 foreground = true;
663 break;
664 case 'P':
665 pidfile = optarg;
666 break;
667 case 'h':
668 default:
669 usage();
670 }
671 }
672 argc -= optind;
673 argv += optind;
674
675 pjdlog_debug_set(debuglevel);
676
677 pfh = pidfile_open(pidfile, 0600, &otherpid);
678 if (pfh == NULL) {
679 if (errno == EEXIST) {
680 pjdlog_exitx(EX_TEMPFAIL,
681 "Another hastd is already running, pid: %jd.",
682 (intmax_t)otherpid);
683 }
684 /* If we cannot create pidfile from other reasons, only warn. */
685 pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile");
686 }
687
688 cfg = yy_config_parse(cfgpath, true);
689 assert(cfg != NULL);
690
691 signal(SIGHUP, sighandler);
692 signal(SIGCHLD, sighandler);
693
694 /* Listen on control address. */
695 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) {
696 KEEP_ERRNO((void)pidfile_remove(pfh));
697 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
698 cfg->hc_controladdr);
699 }
700 /* Listen for remote connections. */
701 if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) {
702 KEEP_ERRNO((void)pidfile_remove(pfh));
703 pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
704 cfg->hc_listenaddr);
705 }
706
707 if (!foreground) {
708 if (daemon(0, 0) < 0) {
709 KEEP_ERRNO((void)pidfile_remove(pfh));
710 pjdlog_exit(EX_OSERR, "Unable to daemonize");
711 }
712
713 /* Start logging to syslog. */
714 pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
715
716 /* Write PID to a file. */
717 if (pidfile_write(pfh) < 0) {
718 pjdlog_errno(LOG_WARNING,
719 "Unable to write PID to a file");
720 }
721 }
722
723 main_loop();
724
725 exit(0);
726}
218 return (false);
219}
220
221static void
222hastd_reload(void)
223{
224 struct hastd_config *newcfg;
225 struct hast_resource *nres, *cres, *tres;
226 uint8_t role;
227
228 pjdlog_info("Reloading configuration...");
229
230 newcfg = yy_config_parse(cfgpath, false);
231 if (newcfg == NULL)
232 goto failed;
233
234 /*
235 * Check if control address has changed.
236 */
237 if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) {
238 if (proto_server(newcfg->hc_controladdr,
239 &newcfg->hc_controlconn) < 0) {
240 pjdlog_errno(LOG_ERR,
241 "Unable to listen on control address %s",
242 newcfg->hc_controladdr);
243 goto failed;
244 }
245 }
246 /*
247 * Check if listen address has changed.
248 */
249 if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) {
250 if (proto_server(newcfg->hc_listenaddr,
251 &newcfg->hc_listenconn) < 0) {
252 pjdlog_errno(LOG_ERR, "Unable to listen on address %s",
253 newcfg->hc_listenaddr);
254 goto failed;
255 }
256 }
257 /*
258 * Only when both control and listen sockets are successfully
259 * initialized switch them to new configuration.
260 */
261 if (newcfg->hc_controlconn != NULL) {
262 pjdlog_info("Control socket changed from %s to %s.",
263 cfg->hc_controladdr, newcfg->hc_controladdr);
264 proto_close(cfg->hc_controlconn);
265 cfg->hc_controlconn = newcfg->hc_controlconn;
266 newcfg->hc_controlconn = NULL;
267 strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr,
268 sizeof(cfg->hc_controladdr));
269 }
270 if (newcfg->hc_listenconn != NULL) {
271 pjdlog_info("Listen socket changed from %s to %s.",
272 cfg->hc_listenaddr, newcfg->hc_listenaddr);
273 proto_close(cfg->hc_listenconn);
274 cfg->hc_listenconn = newcfg->hc_listenconn;
275 newcfg->hc_listenconn = NULL;
276 strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr,
277 sizeof(cfg->hc_listenaddr));
278 }
279
280 /*
281 * Stop and remove resources that were removed from the configuration.
282 */
283 TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) {
284 TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) {
285 if (strcmp(cres->hr_name, nres->hr_name) == 0)
286 break;
287 }
288 if (nres == NULL) {
289 control_set_role(cres, HAST_ROLE_INIT);
290 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
291 pjdlog_info("Resource %s removed.", cres->hr_name);
292 free(cres);
293 }
294 }
295 /*
296 * Move new resources to the current configuration.
297 */
298 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
299 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
300 if (strcmp(cres->hr_name, nres->hr_name) == 0)
301 break;
302 }
303 if (cres == NULL) {
304 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
305 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
306 pjdlog_info("Resource %s added.", nres->hr_name);
307 }
308 }
309 /*
310 * Deal with modified resources.
311 * Depending on what has changed exactly we might want to perform
312 * different actions.
313 *
314 * We do full resource restart in the following situations:
315 * Resource role is INIT or SECONDARY.
316 * Resource role is PRIMARY and path to local component or provider
317 * name has changed.
318 * In case of PRIMARY, the worker process will be killed and restarted,
319 * which also means removing /dev/hast/<name> provider and
320 * recreating it.
321 *
322 * We do just reload (send SIGHUP to worker process) if we act as
323 * PRIMARY, but only remote address, replication mode and timeout
324 * has changed. For those, there is no need to restart worker process.
325 * If PRIMARY receives SIGHUP, it will reconnect if remote address or
326 * replication mode has changed or simply set new timeout if only
327 * timeout has changed.
328 */
329 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
330 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
331 if (strcmp(cres->hr_name, nres->hr_name) == 0)
332 break;
333 }
334 assert(cres != NULL);
335 if (resource_needs_restart(cres, nres)) {
336 pjdlog_info("Resource %s configuration was modified, restarting it.",
337 cres->hr_name);
338 role = cres->hr_role;
339 control_set_role(cres, HAST_ROLE_INIT);
340 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
341 free(cres);
342 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
343 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
344 control_set_role(nres, role);
345 } else if (resource_needs_reload(cres, nres)) {
346 pjdlog_info("Resource %s configuration was modified, reloading it.",
347 cres->hr_name);
348 strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr,
349 sizeof(cres->hr_remoteaddr));
350 cres->hr_replication = nres->hr_replication;
351 cres->hr_timeout = nres->hr_timeout;
352 if (cres->hr_workerpid != 0) {
353 if (kill(cres->hr_workerpid, SIGHUP) < 0) {
354 pjdlog_errno(LOG_WARNING,
355 "Unable to send SIGHUP to worker process %u",
356 (unsigned int)cres->hr_workerpid);
357 }
358 }
359 }
360 }
361
362 yy_config_free(newcfg);
363 pjdlog_info("Configuration reloaded successfully.");
364 return;
365failed:
366 if (newcfg != NULL) {
367 if (newcfg->hc_controlconn != NULL)
368 proto_close(newcfg->hc_controlconn);
369 if (newcfg->hc_listenconn != NULL)
370 proto_close(newcfg->hc_listenconn);
371 yy_config_free(newcfg);
372 }
373 pjdlog_warning("Configuration not reloaded.");
374}
375
376static void
377listen_accept(void)
378{
379 struct hast_resource *res;
380 struct proto_conn *conn;
381 struct nv *nvin, *nvout, *nverr;
382 const char *resname;
383 const unsigned char *token;
384 char laddr[256], raddr[256];
385 size_t size;
386 pid_t pid;
387 int status;
388
389 proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr));
390 pjdlog_debug(1, "Accepting connection to %s.", laddr);
391
392 if (proto_accept(cfg->hc_listenconn, &conn) < 0) {
393 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
394 return;
395 }
396
397 proto_local_address(conn, laddr, sizeof(laddr));
398 proto_remote_address(conn, raddr, sizeof(raddr));
399 pjdlog_info("Connection from %s to %s.", raddr, laddr);
400
401 /* Error in setting timeout is not critical, but why should it fail? */
402 if (proto_timeout(conn, HAST_TIMEOUT) < 0)
403 pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
404
405 nvin = nvout = nverr = NULL;
406
407 /*
408 * Before receiving any data see if remote host have access to any
409 * resource.
410 */
411 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
412 if (proto_address_match(conn, res->hr_remoteaddr))
413 break;
414 }
415 if (res == NULL) {
416 pjdlog_error("Client %s isn't known.", raddr);
417 goto close;
418 }
419 /* Ok, remote host can access at least one resource. */
420
421 if (hast_proto_recv_hdr(conn, &nvin) < 0) {
422 pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
423 raddr);
424 goto close;
425 }
426
427 resname = nv_get_string(nvin, "resource");
428 if (resname == NULL) {
429 pjdlog_error("No 'resource' field in the header received from %s.",
430 raddr);
431 goto close;
432 }
433 pjdlog_debug(2, "%s: resource=%s", raddr, resname);
434 token = nv_get_uint8_array(nvin, &size, "token");
435 /*
436 * NULL token means that this is first conection.
437 */
438 if (token != NULL && size != sizeof(res->hr_token)) {
439 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
440 raddr, sizeof(res->hr_token), size);
441 goto close;
442 }
443
444 /*
445 * From now on we want to send errors to the remote node.
446 */
447 nverr = nv_alloc();
448
449 /* Find resource related to this connection. */
450 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
451 if (strcmp(resname, res->hr_name) == 0)
452 break;
453 }
454 /* Have we found the resource? */
455 if (res == NULL) {
456 pjdlog_error("No resource '%s' as requested by %s.",
457 resname, raddr);
458 nv_add_stringf(nverr, "errmsg", "Resource not configured.");
459 goto fail;
460 }
461
462 /* Now that we know resource name setup log prefix. */
463 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
464
465 /* Does the remote host have access to this resource? */
466 if (!proto_address_match(conn, res->hr_remoteaddr)) {
467 pjdlog_error("Client %s has no access to the resource.", raddr);
468 nv_add_stringf(nverr, "errmsg", "No access to the resource.");
469 goto fail;
470 }
471 /* Is the resource marked as secondary? */
472 if (res->hr_role != HAST_ROLE_SECONDARY) {
473 pjdlog_error("We act as %s for the resource and not as %s as requested by %s.",
474 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
475 raddr);
476 nv_add_stringf(nverr, "errmsg",
477 "Remote node acts as %s for the resource and not as %s.",
478 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
479 goto fail;
480 }
481 /* Does token (if exists) match? */
482 if (token != NULL && memcmp(token, res->hr_token,
483 sizeof(res->hr_token)) != 0) {
484 pjdlog_error("Token received from %s doesn't match.", raddr);
485 nv_add_stringf(nverr, "errmsg", "Token doesn't match.");
486 goto fail;
487 }
488 /*
489 * If there is no token, but we have half-open connection
490 * (only remotein) or full connection (worker process is running)
491 * we have to cancel those and accept the new connection.
492 */
493 if (token == NULL) {
494 assert(res->hr_remoteout == NULL);
495 pjdlog_debug(1, "Initial connection from %s.", raddr);
496 if (res->hr_workerpid != 0) {
497 assert(res->hr_remotein == NULL);
498 pjdlog_debug(1,
499 "Worker process exists (pid=%u), stopping it.",
500 (unsigned int)res->hr_workerpid);
501 /* Stop child process. */
502 if (kill(res->hr_workerpid, SIGINT) < 0) {
503 pjdlog_errno(LOG_ERR,
504 "Unable to stop worker process (pid=%u)",
505 (unsigned int)res->hr_workerpid);
506 /*
507 * Other than logging the problem we
508 * ignore it - nothing smart to do.
509 */
510 }
511 /* Wait for it to exit. */
512 else if ((pid = waitpid(res->hr_workerpid,
513 &status, 0)) != res->hr_workerpid) {
514 /* We can only log the problem. */
515 pjdlog_errno(LOG_ERR,
516 "Waiting for worker process (pid=%u) failed",
517 (unsigned int)res->hr_workerpid);
518 } else {
519 child_exit_log(res->hr_workerpid, status);
520 }
521 res->hr_workerpid = 0;
522 } else if (res->hr_remotein != NULL) {
523 char oaddr[256];
524
525 proto_remote_address(conn, oaddr, sizeof(oaddr));
526 pjdlog_debug(1,
527 "Canceling half-open connection from %s on connection from %s.",
528 oaddr, raddr);
529 proto_close(res->hr_remotein);
530 res->hr_remotein = NULL;
531 }
532 }
533
534 /*
535 * Checks and cleanups are done.
536 */
537
538 if (token == NULL) {
539 arc4random_buf(res->hr_token, sizeof(res->hr_token));
540 nvout = nv_alloc();
541 nv_add_uint8_array(nvout, res->hr_token,
542 sizeof(res->hr_token), "token");
543 if (nv_error(nvout) != 0) {
544 pjdlog_common(LOG_ERR, 0, nv_error(nvout),
545 "Unable to prepare return header for %s", raddr);
546 nv_add_stringf(nverr, "errmsg",
547 "Remote node was unable to prepare return header: %s.",
548 strerror(nv_error(nvout)));
549 goto fail;
550 }
551 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) {
552 int error = errno;
553
554 pjdlog_errno(LOG_ERR, "Unable to send response to %s",
555 raddr);
556 nv_add_stringf(nverr, "errmsg",
557 "Remote node was unable to send response: %s.",
558 strerror(error));
559 goto fail;
560 }
561 res->hr_remotein = conn;
562 pjdlog_debug(1, "Incoming connection from %s configured.",
563 raddr);
564 } else {
565 res->hr_remoteout = conn;
566 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
567 hastd_secondary(res, nvin);
568 }
569 nv_free(nvin);
570 nv_free(nvout);
571 nv_free(nverr);
572 pjdlog_prefix_set("%s", "");
573 return;
574fail:
575 if (nv_error(nverr) != 0) {
576 pjdlog_common(LOG_ERR, 0, nv_error(nverr),
577 "Unable to prepare error header for %s", raddr);
578 goto close;
579 }
580 if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) {
581 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
582 goto close;
583 }
584close:
585 if (nvin != NULL)
586 nv_free(nvin);
587 if (nvout != NULL)
588 nv_free(nvout);
589 if (nverr != NULL)
590 nv_free(nverr);
591 proto_close(conn);
592 pjdlog_prefix_set("%s", "");
593}
594
595static void
596main_loop(void)
597{
598 fd_set rfds, wfds;
599 int cfd, lfd, maxfd, ret;
600
601 for (;;) {
602 if (sigchld_received) {
603 sigchld_received = false;
604 child_exit();
605 }
606 if (sighup_received) {
607 sighup_received = false;
608 hastd_reload();
609 }
610
611 cfd = proto_descriptor(cfg->hc_controlconn);
612 lfd = proto_descriptor(cfg->hc_listenconn);
613 maxfd = cfd > lfd ? cfd : lfd;
614
615 /* Setup descriptors for select(2). */
616 FD_ZERO(&rfds);
617 FD_SET(cfd, &rfds);
618 FD_SET(lfd, &rfds);
619 FD_ZERO(&wfds);
620 FD_SET(cfd, &wfds);
621 FD_SET(lfd, &wfds);
622
623 ret = select(maxfd + 1, &rfds, &wfds, NULL, NULL);
624 if (ret == -1) {
625 if (errno == EINTR)
626 continue;
627 KEEP_ERRNO((void)pidfile_remove(pfh));
628 pjdlog_exit(EX_OSERR, "select() failed");
629 }
630
631 if (FD_ISSET(cfd, &rfds) || FD_ISSET(cfd, &wfds))
632 control_handle(cfg);
633 if (FD_ISSET(lfd, &rfds) || FD_ISSET(lfd, &wfds))
634 listen_accept();
635 }
636}
637
638int
639main(int argc, char *argv[])
640{
641 const char *pidfile;
642 pid_t otherpid;
643 bool foreground;
644 int debuglevel;
645
646 g_gate_load();
647
648 foreground = false;
649 debuglevel = 0;
650 pidfile = HASTD_PIDFILE;
651
652 for (;;) {
653 int ch;
654
655 ch = getopt(argc, argv, "c:dFhP:");
656 if (ch == -1)
657 break;
658 switch (ch) {
659 case 'c':
660 cfgpath = optarg;
661 break;
662 case 'd':
663 debuglevel++;
664 break;
665 case 'F':
666 foreground = true;
667 break;
668 case 'P':
669 pidfile = optarg;
670 break;
671 case 'h':
672 default:
673 usage();
674 }
675 }
676 argc -= optind;
677 argv += optind;
678
679 pjdlog_debug_set(debuglevel);
680
681 pfh = pidfile_open(pidfile, 0600, &otherpid);
682 if (pfh == NULL) {
683 if (errno == EEXIST) {
684 pjdlog_exitx(EX_TEMPFAIL,
685 "Another hastd is already running, pid: %jd.",
686 (intmax_t)otherpid);
687 }
688 /* If we cannot create pidfile from other reasons, only warn. */
689 pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile");
690 }
691
692 cfg = yy_config_parse(cfgpath, true);
693 assert(cfg != NULL);
694
695 signal(SIGHUP, sighandler);
696 signal(SIGCHLD, sighandler);
697
698 /* Listen on control address. */
699 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) {
700 KEEP_ERRNO((void)pidfile_remove(pfh));
701 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
702 cfg->hc_controladdr);
703 }
704 /* Listen for remote connections. */
705 if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) {
706 KEEP_ERRNO((void)pidfile_remove(pfh));
707 pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
708 cfg->hc_listenaddr);
709 }
710
711 if (!foreground) {
712 if (daemon(0, 0) < 0) {
713 KEEP_ERRNO((void)pidfile_remove(pfh));
714 pjdlog_exit(EX_OSERR, "Unable to daemonize");
715 }
716
717 /* Start logging to syslog. */
718 pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
719
720 /* Write PID to a file. */
721 if (pidfile_write(pfh) < 0) {
722 pjdlog_errno(LOG_WARNING,
723 "Unable to write PID to a file");
724 }
725 }
726
727 main_loop();
728
729 exit(0);
730}