hastd.c revision 212038
1/*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org> 4 * All rights reserved. 5 * 6 * This software was developed by Pawel Jakub Dawidek under sponsorship from 7 * the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31#include <sys/cdefs.h> 32__FBSDID("$FreeBSD: head/sbin/hastd/hastd.c 212038 2010-08-30 23:26:10Z pjd $"); 33 34#include <sys/param.h> 35#include <sys/linker.h> 36#include <sys/module.h> 37#include <sys/wait.h> 38 39#include <assert.h> 40#include <err.h> 41#include <errno.h> 42#include <libutil.h> 43#include <signal.h> 44#include <stdbool.h> 45#include <stdio.h> 46#include <stdlib.h> 47#include <string.h> 48#include <sysexits.h> 49#include <unistd.h> 50 51#include <activemap.h> 52#include <pjdlog.h> 53 54#include "control.h" 55#include "event.h" 56#include "hast.h" 57#include "hast_proto.h" 58#include "hastd.h" 59#include "hooks.h" 60#include "subr.h" 61 62/* Path to configuration file. */ 63const char *cfgpath = HAST_CONFIG; 64/* Hastd configuration. */ 65static struct hastd_config *cfg; 66/* Was SIGCHLD signal received? */ 67bool sigchld_received = false; 68/* Was SIGHUP signal received? */ 69bool sighup_received = false; 70/* Was SIGINT or SIGTERM signal received? */ 71bool sigexit_received = false; 72/* PID file handle. */ 73struct pidfh *pfh; 74 75/* How often check for hooks running for too long. */ 76#define REPORT_INTERVAL 10 77 78static void 79usage(void) 80{ 81 82 errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]"); 83} 84 85static void 86sighandler(int sig) 87{ 88 89 switch (sig) { 90 case SIGINT: 91 case SIGTERM: 92 sigexit_received = true; 93 break; 94 case SIGCHLD: 95 sigchld_received = true; 96 break; 97 case SIGHUP: 98 sighup_received = true; 99 break; 100 default: 101 assert(!"invalid condition"); 102 } 103} 104 105static void 106g_gate_load(void) 107{ 108 109 if (modfind("g_gate") == -1) { 110 /* Not present in kernel, try loading it. */ 111 if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) { 112 if (errno != EEXIST) { 113 pjdlog_exit(EX_OSERR, 114 "Unable to load geom_gate module"); 115 } 116 } 117 } 118} 119 120static void 121child_exit_log(unsigned int pid, int status) 122{ 123 124 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 125 pjdlog_debug(1, "Worker process exited gracefully (pid=%u).", 126 pid); 127 } else if (WIFSIGNALED(status)) { 128 pjdlog_error("Worker process killed (pid=%u, signal=%d).", 129 pid, WTERMSIG(status)); 130 } else { 131 pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).", 132 pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1); 133 } 134} 135 136static void 137child_exit(void) 138{ 139 struct hast_resource *res; 140 int status; 141 pid_t pid; 142 143 while ((pid = wait3(&status, WNOHANG, NULL)) > 0) { 144 /* Find resource related to the process that just exited. */ 145 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 146 if (pid == res->hr_workerpid) 147 break; 148 } 149 if (res == NULL) { 150 /* 151 * This can happen when new connection arrives and we 152 * cancel child responsible for the old one or if this 153 * was hook which we executed. 154 */ 155 hook_check_one(pid, status); 156 continue; 157 } 158 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 159 role2str(res->hr_role)); 160 child_exit_log(pid, status); 161 proto_close(res->hr_ctrl); 162 res->hr_ctrl = NULL; 163 if (res->hr_event != NULL) { 164 proto_close(res->hr_event); 165 res->hr_event = NULL; 166 } 167 res->hr_workerpid = 0; 168 if (res->hr_role == HAST_ROLE_PRIMARY) { 169 /* 170 * Restart child process if it was killed by signal 171 * or exited because of temporary problem. 172 */ 173 if (WIFSIGNALED(status) || 174 (WIFEXITED(status) && 175 WEXITSTATUS(status) == EX_TEMPFAIL)) { 176 sleep(1); 177 pjdlog_info("Restarting worker process."); 178 hastd_primary(res); 179 } else { 180 res->hr_role = HAST_ROLE_INIT; 181 pjdlog_info("Changing resource role back to %s.", 182 role2str(res->hr_role)); 183 } 184 } 185 pjdlog_prefix_set("%s", ""); 186 } 187} 188 189static bool 190resource_needs_restart(const struct hast_resource *res0, 191 const struct hast_resource *res1) 192{ 193 194 assert(strcmp(res0->hr_name, res1->hr_name) == 0); 195 196 if (strcmp(res0->hr_provname, res1->hr_provname) != 0) 197 return (true); 198 if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0) 199 return (true); 200 if (res0->hr_role == HAST_ROLE_INIT || 201 res0->hr_role == HAST_ROLE_SECONDARY) { 202 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) 203 return (true); 204 if (res0->hr_replication != res1->hr_replication) 205 return (true); 206 if (res0->hr_timeout != res1->hr_timeout) 207 return (true); 208 if (strcmp(res0->hr_exec, res1->hr_exec) != 0) 209 return (true); 210 } 211 return (false); 212} 213 214static bool 215resource_needs_reload(const struct hast_resource *res0, 216 const struct hast_resource *res1) 217{ 218 219 assert(strcmp(res0->hr_name, res1->hr_name) == 0); 220 assert(strcmp(res0->hr_provname, res1->hr_provname) == 0); 221 assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0); 222 223 if (res0->hr_role != HAST_ROLE_PRIMARY) 224 return (false); 225 226 if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) 227 return (true); 228 if (res0->hr_replication != res1->hr_replication) 229 return (true); 230 if (res0->hr_timeout != res1->hr_timeout) 231 return (true); 232 if (strcmp(res0->hr_exec, res1->hr_exec) != 0) 233 return (true); 234 return (false); 235} 236 237static void 238hastd_reload(void) 239{ 240 struct hastd_config *newcfg; 241 struct hast_resource *nres, *cres, *tres; 242 uint8_t role; 243 244 pjdlog_info("Reloading configuration..."); 245 246 newcfg = yy_config_parse(cfgpath, false); 247 if (newcfg == NULL) 248 goto failed; 249 250 /* 251 * Check if control address has changed. 252 */ 253 if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) { 254 if (proto_server(newcfg->hc_controladdr, 255 &newcfg->hc_controlconn) < 0) { 256 pjdlog_errno(LOG_ERR, 257 "Unable to listen on control address %s", 258 newcfg->hc_controladdr); 259 goto failed; 260 } 261 } 262 /* 263 * Check if listen address has changed. 264 */ 265 if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) { 266 if (proto_server(newcfg->hc_listenaddr, 267 &newcfg->hc_listenconn) < 0) { 268 pjdlog_errno(LOG_ERR, "Unable to listen on address %s", 269 newcfg->hc_listenaddr); 270 goto failed; 271 } 272 } 273 /* 274 * Only when both control and listen sockets are successfully 275 * initialized switch them to new configuration. 276 */ 277 if (newcfg->hc_controlconn != NULL) { 278 pjdlog_info("Control socket changed from %s to %s.", 279 cfg->hc_controladdr, newcfg->hc_controladdr); 280 proto_close(cfg->hc_controlconn); 281 cfg->hc_controlconn = newcfg->hc_controlconn; 282 newcfg->hc_controlconn = NULL; 283 strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr, 284 sizeof(cfg->hc_controladdr)); 285 } 286 if (newcfg->hc_listenconn != NULL) { 287 pjdlog_info("Listen socket changed from %s to %s.", 288 cfg->hc_listenaddr, newcfg->hc_listenaddr); 289 proto_close(cfg->hc_listenconn); 290 cfg->hc_listenconn = newcfg->hc_listenconn; 291 newcfg->hc_listenconn = NULL; 292 strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr, 293 sizeof(cfg->hc_listenaddr)); 294 } 295 296 /* 297 * Stop and remove resources that were removed from the configuration. 298 */ 299 TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) { 300 TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) { 301 if (strcmp(cres->hr_name, nres->hr_name) == 0) 302 break; 303 } 304 if (nres == NULL) { 305 control_set_role(cres, HAST_ROLE_INIT); 306 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); 307 pjdlog_info("Resource %s removed.", cres->hr_name); 308 free(cres); 309 } 310 } 311 /* 312 * Move new resources to the current configuration. 313 */ 314 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { 315 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { 316 if (strcmp(cres->hr_name, nres->hr_name) == 0) 317 break; 318 } 319 if (cres == NULL) { 320 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); 321 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); 322 pjdlog_info("Resource %s added.", nres->hr_name); 323 } 324 } 325 /* 326 * Deal with modified resources. 327 * Depending on what has changed exactly we might want to perform 328 * different actions. 329 * 330 * We do full resource restart in the following situations: 331 * Resource role is INIT or SECONDARY. 332 * Resource role is PRIMARY and path to local component or provider 333 * name has changed. 334 * In case of PRIMARY, the worker process will be killed and restarted, 335 * which also means removing /dev/hast/<name> provider and 336 * recreating it. 337 * 338 * We do just reload (send SIGHUP to worker process) if we act as 339 * PRIMARY, but only remote address, replication mode and timeout 340 * has changed. For those, there is no need to restart worker process. 341 * If PRIMARY receives SIGHUP, it will reconnect if remote address or 342 * replication mode has changed or simply set new timeout if only 343 * timeout has changed. 344 */ 345 TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { 346 TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { 347 if (strcmp(cres->hr_name, nres->hr_name) == 0) 348 break; 349 } 350 assert(cres != NULL); 351 if (resource_needs_restart(cres, nres)) { 352 pjdlog_info("Resource %s configuration was modified, restarting it.", 353 cres->hr_name); 354 role = cres->hr_role; 355 control_set_role(cres, HAST_ROLE_INIT); 356 TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); 357 free(cres); 358 TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); 359 TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); 360 control_set_role(nres, role); 361 } else if (resource_needs_reload(cres, nres)) { 362 pjdlog_info("Resource %s configuration was modified, reloading it.", 363 cres->hr_name); 364 strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr, 365 sizeof(cres->hr_remoteaddr)); 366 cres->hr_replication = nres->hr_replication; 367 cres->hr_timeout = nres->hr_timeout; 368 if (cres->hr_workerpid != 0) { 369 if (kill(cres->hr_workerpid, SIGHUP) < 0) { 370 pjdlog_errno(LOG_WARNING, 371 "Unable to send SIGHUP to worker process %u", 372 (unsigned int)cres->hr_workerpid); 373 } 374 } 375 } 376 } 377 378 yy_config_free(newcfg); 379 pjdlog_info("Configuration reloaded successfully."); 380 return; 381failed: 382 if (newcfg != NULL) { 383 if (newcfg->hc_controlconn != NULL) 384 proto_close(newcfg->hc_controlconn); 385 if (newcfg->hc_listenconn != NULL) 386 proto_close(newcfg->hc_listenconn); 387 yy_config_free(newcfg); 388 } 389 pjdlog_warning("Configuration not reloaded."); 390} 391 392static void 393terminate_workers(void) 394{ 395 struct hast_resource *res; 396 397 pjdlog_info("Termination signal received, exiting."); 398 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 399 if (res->hr_workerpid == 0) 400 continue; 401 pjdlog_info("Terminating worker process (resource=%s, role=%s, pid=%u).", 402 res->hr_name, role2str(res->hr_role), res->hr_workerpid); 403 if (kill(res->hr_workerpid, SIGTERM) == 0) 404 continue; 405 pjdlog_errno(LOG_WARNING, 406 "Unable to send signal to worker process (resource=%s, role=%s, pid=%u).", 407 res->hr_name, role2str(res->hr_role), res->hr_workerpid); 408 } 409} 410 411static void 412listen_accept(void) 413{ 414 struct hast_resource *res; 415 struct proto_conn *conn; 416 struct nv *nvin, *nvout, *nverr; 417 const char *resname; 418 const unsigned char *token; 419 char laddr[256], raddr[256]; 420 size_t size; 421 pid_t pid; 422 int status; 423 424 proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr)); 425 pjdlog_debug(1, "Accepting connection to %s.", laddr); 426 427 if (proto_accept(cfg->hc_listenconn, &conn) < 0) { 428 pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr); 429 return; 430 } 431 432 proto_local_address(conn, laddr, sizeof(laddr)); 433 proto_remote_address(conn, raddr, sizeof(raddr)); 434 pjdlog_info("Connection from %s to %s.", raddr, laddr); 435 436 /* Error in setting timeout is not critical, but why should it fail? */ 437 if (proto_timeout(conn, HAST_TIMEOUT) < 0) 438 pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); 439 440 nvin = nvout = nverr = NULL; 441 442 /* 443 * Before receiving any data see if remote host have access to any 444 * resource. 445 */ 446 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 447 if (proto_address_match(conn, res->hr_remoteaddr)) 448 break; 449 } 450 if (res == NULL) { 451 pjdlog_error("Client %s isn't known.", raddr); 452 goto close; 453 } 454 /* Ok, remote host can access at least one resource. */ 455 456 if (hast_proto_recv_hdr(conn, &nvin) < 0) { 457 pjdlog_errno(LOG_ERR, "Unable to receive header from %s", 458 raddr); 459 goto close; 460 } 461 462 resname = nv_get_string(nvin, "resource"); 463 if (resname == NULL) { 464 pjdlog_error("No 'resource' field in the header received from %s.", 465 raddr); 466 goto close; 467 } 468 pjdlog_debug(2, "%s: resource=%s", raddr, resname); 469 token = nv_get_uint8_array(nvin, &size, "token"); 470 /* 471 * NULL token means that this is first conection. 472 */ 473 if (token != NULL && size != sizeof(res->hr_token)) { 474 pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).", 475 raddr, sizeof(res->hr_token), size); 476 goto close; 477 } 478 479 /* 480 * From now on we want to send errors to the remote node. 481 */ 482 nverr = nv_alloc(); 483 484 /* Find resource related to this connection. */ 485 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 486 if (strcmp(resname, res->hr_name) == 0) 487 break; 488 } 489 /* Have we found the resource? */ 490 if (res == NULL) { 491 pjdlog_error("No resource '%s' as requested by %s.", 492 resname, raddr); 493 nv_add_stringf(nverr, "errmsg", "Resource not configured."); 494 goto fail; 495 } 496 497 /* Now that we know resource name setup log prefix. */ 498 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 499 500 /* Does the remote host have access to this resource? */ 501 if (!proto_address_match(conn, res->hr_remoteaddr)) { 502 pjdlog_error("Client %s has no access to the resource.", raddr); 503 nv_add_stringf(nverr, "errmsg", "No access to the resource."); 504 goto fail; 505 } 506 /* Is the resource marked as secondary? */ 507 if (res->hr_role != HAST_ROLE_SECONDARY) { 508 pjdlog_error("We act as %s for the resource and not as %s as requested by %s.", 509 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY), 510 raddr); 511 nv_add_stringf(nverr, "errmsg", 512 "Remote node acts as %s for the resource and not as %s.", 513 role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY)); 514 goto fail; 515 } 516 /* Does token (if exists) match? */ 517 if (token != NULL && memcmp(token, res->hr_token, 518 sizeof(res->hr_token)) != 0) { 519 pjdlog_error("Token received from %s doesn't match.", raddr); 520 nv_add_stringf(nverr, "errmsg", "Token doesn't match."); 521 goto fail; 522 } 523 /* 524 * If there is no token, but we have half-open connection 525 * (only remotein) or full connection (worker process is running) 526 * we have to cancel those and accept the new connection. 527 */ 528 if (token == NULL) { 529 assert(res->hr_remoteout == NULL); 530 pjdlog_debug(1, "Initial connection from %s.", raddr); 531 if (res->hr_workerpid != 0) { 532 assert(res->hr_remotein == NULL); 533 pjdlog_debug(1, 534 "Worker process exists (pid=%u), stopping it.", 535 (unsigned int)res->hr_workerpid); 536 /* Stop child process. */ 537 if (kill(res->hr_workerpid, SIGINT) < 0) { 538 pjdlog_errno(LOG_ERR, 539 "Unable to stop worker process (pid=%u)", 540 (unsigned int)res->hr_workerpid); 541 /* 542 * Other than logging the problem we 543 * ignore it - nothing smart to do. 544 */ 545 } 546 /* Wait for it to exit. */ 547 else if ((pid = waitpid(res->hr_workerpid, 548 &status, 0)) != res->hr_workerpid) { 549 /* We can only log the problem. */ 550 pjdlog_errno(LOG_ERR, 551 "Waiting for worker process (pid=%u) failed", 552 (unsigned int)res->hr_workerpid); 553 } else { 554 child_exit_log(res->hr_workerpid, status); 555 } 556 res->hr_workerpid = 0; 557 } else if (res->hr_remotein != NULL) { 558 char oaddr[256]; 559 560 proto_remote_address(conn, oaddr, sizeof(oaddr)); 561 pjdlog_debug(1, 562 "Canceling half-open connection from %s on connection from %s.", 563 oaddr, raddr); 564 proto_close(res->hr_remotein); 565 res->hr_remotein = NULL; 566 } 567 } 568 569 /* 570 * Checks and cleanups are done. 571 */ 572 573 if (token == NULL) { 574 arc4random_buf(res->hr_token, sizeof(res->hr_token)); 575 nvout = nv_alloc(); 576 nv_add_uint8_array(nvout, res->hr_token, 577 sizeof(res->hr_token), "token"); 578 if (nv_error(nvout) != 0) { 579 pjdlog_common(LOG_ERR, 0, nv_error(nvout), 580 "Unable to prepare return header for %s", raddr); 581 nv_add_stringf(nverr, "errmsg", 582 "Remote node was unable to prepare return header: %s.", 583 strerror(nv_error(nvout))); 584 goto fail; 585 } 586 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) { 587 int error = errno; 588 589 pjdlog_errno(LOG_ERR, "Unable to send response to %s", 590 raddr); 591 nv_add_stringf(nverr, "errmsg", 592 "Remote node was unable to send response: %s.", 593 strerror(error)); 594 goto fail; 595 } 596 res->hr_remotein = conn; 597 pjdlog_debug(1, "Incoming connection from %s configured.", 598 raddr); 599 } else { 600 res->hr_remoteout = conn; 601 pjdlog_debug(1, "Outgoing connection to %s configured.", raddr); 602 hastd_secondary(res, nvin); 603 } 604 nv_free(nvin); 605 nv_free(nvout); 606 nv_free(nverr); 607 pjdlog_prefix_set("%s", ""); 608 return; 609fail: 610 if (nv_error(nverr) != 0) { 611 pjdlog_common(LOG_ERR, 0, nv_error(nverr), 612 "Unable to prepare error header for %s", raddr); 613 goto close; 614 } 615 if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) { 616 pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr); 617 goto close; 618 } 619close: 620 if (nvin != NULL) 621 nv_free(nvin); 622 if (nvout != NULL) 623 nv_free(nvout); 624 if (nverr != NULL) 625 nv_free(nverr); 626 proto_close(conn); 627 pjdlog_prefix_set("%s", ""); 628} 629 630static void 631main_loop(void) 632{ 633 struct hast_resource *res; 634 struct timeval timeout; 635 int fd, maxfd, ret; 636 fd_set rfds; 637 638 timeout.tv_sec = REPORT_INTERVAL; 639 timeout.tv_usec = 0; 640 641 for (;;) { 642 if (sigexit_received) { 643 sigexit_received = false; 644 terminate_workers(); 645 exit(EX_OK); 646 } 647 if (sigchld_received) { 648 sigchld_received = false; 649 child_exit(); 650 } 651 if (sighup_received) { 652 sighup_received = false; 653 hastd_reload(); 654 } 655 656 /* Setup descriptors for select(2). */ 657 FD_ZERO(&rfds); 658 maxfd = fd = proto_descriptor(cfg->hc_controlconn); 659 FD_SET(fd, &rfds); 660 fd = proto_descriptor(cfg->hc_listenconn); 661 FD_SET(fd, &rfds); 662 maxfd = fd > maxfd ? fd : maxfd; 663 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 664 if (res->hr_event == NULL) 665 continue; 666 fd = proto_descriptor(res->hr_event); 667 FD_SET(fd, &rfds); 668 maxfd = fd > maxfd ? fd : maxfd; 669 } 670 671 ret = select(maxfd + 1, &rfds, NULL, NULL, &timeout); 672 if (ret == 0) 673 hook_check(false); 674 else if (ret == -1) { 675 if (errno == EINTR) 676 continue; 677 KEEP_ERRNO((void)pidfile_remove(pfh)); 678 pjdlog_exit(EX_OSERR, "select() failed"); 679 } 680 681 if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds)) 682 control_handle(cfg); 683 if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds)) 684 listen_accept(); 685 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 686 if (res->hr_event == NULL) 687 continue; 688 if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) { 689 if (event_recv(res) == 0) 690 continue; 691 /* The worker process exited? */ 692 proto_close(res->hr_event); 693 res->hr_event = NULL; 694 } 695 } 696 } 697} 698 699int 700main(int argc, char *argv[]) 701{ 702 const char *pidfile; 703 pid_t otherpid; 704 bool foreground; 705 int debuglevel; 706 707 g_gate_load(); 708 709 foreground = false; 710 debuglevel = 0; 711 pidfile = HASTD_PIDFILE; 712 713 for (;;) { 714 int ch; 715 716 ch = getopt(argc, argv, "c:dFhP:"); 717 if (ch == -1) 718 break; 719 switch (ch) { 720 case 'c': 721 cfgpath = optarg; 722 break; 723 case 'd': 724 debuglevel++; 725 break; 726 case 'F': 727 foreground = true; 728 break; 729 case 'P': 730 pidfile = optarg; 731 break; 732 case 'h': 733 default: 734 usage(); 735 } 736 } 737 argc -= optind; 738 argv += optind; 739 740 pjdlog_debug_set(debuglevel); 741 742 pfh = pidfile_open(pidfile, 0600, &otherpid); 743 if (pfh == NULL) { 744 if (errno == EEXIST) { 745 pjdlog_exitx(EX_TEMPFAIL, 746 "Another hastd is already running, pid: %jd.", 747 (intmax_t)otherpid); 748 } 749 /* If we cannot create pidfile from other reasons, only warn. */ 750 pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile"); 751 } 752 753 cfg = yy_config_parse(cfgpath, true); 754 assert(cfg != NULL); 755 756 signal(SIGINT, sighandler); 757 signal(SIGTERM, sighandler); 758 signal(SIGHUP, sighandler); 759 signal(SIGCHLD, sighandler); 760 761 /* Listen on control address. */ 762 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) { 763 KEEP_ERRNO((void)pidfile_remove(pfh)); 764 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s", 765 cfg->hc_controladdr); 766 } 767 /* Listen for remote connections. */ 768 if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) { 769 KEEP_ERRNO((void)pidfile_remove(pfh)); 770 pjdlog_exit(EX_OSERR, "Unable to listen on address %s", 771 cfg->hc_listenaddr); 772 } 773 774 if (!foreground) { 775 if (daemon(0, 0) < 0) { 776 KEEP_ERRNO((void)pidfile_remove(pfh)); 777 pjdlog_exit(EX_OSERR, "Unable to daemonize"); 778 } 779 780 /* Start logging to syslog. */ 781 pjdlog_mode_set(PJDLOG_MODE_SYSLOG); 782 783 /* Write PID to a file. */ 784 if (pidfile_write(pfh) < 0) { 785 pjdlog_errno(LOG_WARNING, 786 "Unable to write PID to a file"); 787 } 788 } 789 790 hook_init(); 791 792 main_loop(); 793 794 exit(0); 795} 796