hastd.c revision 220899
11573Srgrimes/*- 21573Srgrimes * Copyright (c) 2009-2010 The FreeBSD Foundation 31573Srgrimes * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net> 41573Srgrimes * All rights reserved. 51573Srgrimes * 61573Srgrimes * This software was developed by Pawel Jakub Dawidek under sponsorship from 71573Srgrimes * the FreeBSD Foundation. 8227753Stheraven * 9227753Stheraven * Redistribution and use in source and binary forms, with or without 10227753Stheraven * modification, are permitted provided that the following conditions 11227753Stheraven * are met: 12227753Stheraven * 1. Redistributions of source code must retain the above copyright 131573Srgrimes * notice, this list of conditions and the following disclaimer. 141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151573Srgrimes * notice, this list of conditions and the following disclaimer in the 161573Srgrimes * documentation and/or other materials provided with the distribution. 171573Srgrimes * 181573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 191573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 201573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 211573Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 221573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 231573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 241573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 251573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 261573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 271573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 281573Srgrimes * SUCH DAMAGE. 291573Srgrimes */ 301573Srgrimes 311573Srgrimes#include <sys/cdefs.h> 321573Srgrimes__FBSDID("$FreeBSD: head/sbin/hastd/hastd.c 220899 2011-04-20 18:49:12Z pjd $"); 331573Srgrimes 341573Srgrimes#include <sys/param.h> 351573Srgrimes#include <sys/linker.h> 361573Srgrimes#include <sys/module.h> 371573Srgrimes#include <sys/stat.h> 381573Srgrimes#include <sys/wait.h> 391573Srgrimes 401573Srgrimes#include <err.h> 4190045Sobrien#include <errno.h> 4290045Sobrien#include <libutil.h> 431573Srgrimes#include <signal.h> 441573Srgrimes#include <stdbool.h> 451573Srgrimes#include <stdio.h> 461573Srgrimes#include <stdlib.h> 471573Srgrimes#include <string.h> 481573Srgrimes#include <sysexits.h> 491573Srgrimes#include <time.h> 501573Srgrimes#include <unistd.h> 511573Srgrimes 521573Srgrimes#include <activemap.h> 531573Srgrimes#include <pjdlog.h> 541573Srgrimes 551573Srgrimes#include "control.h" 561573Srgrimes#include "event.h" 571573Srgrimes#include "hast.h" 581573Srgrimes#include "hast_proto.h" 591573Srgrimes#include "hastd.h" 601573Srgrimes#include "hooks.h" 611573Srgrimes#include "subr.h" 621573Srgrimes 631573Srgrimes/* Path to configuration file. */ 648870Srgrimesconst char *cfgpath = HAST_CONFIG; 651573Srgrimes/* Hastd configuration. */ 661573Srgrimesstatic struct hastd_config *cfg; 671573Srgrimes/* Was SIGINT or SIGTERM signal received? */ 681573Srgrimesbool sigexit_received = false; 69132817Stjr/* PID file handle. */ 70132817Stjrstruct pidfh *pfh; 71132817Stjr 72132817Stjr/* How often check for hooks running for too long. */ 73132817Stjr#define REPORT_INTERVAL 5 74304284Sache 75132817Stjrstatic void 76132817Stjrusage(void) 77132817Stjr{ 78132817Stjr 791573Srgrimes errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]"); 801573Srgrimes} 811573Srgrimes 821573Srgrimesstatic void 831573Srgrimesg_gate_load(void) 841573Srgrimes{ 851573Srgrimes 86132817Stjr if (modfind("g_gate") == -1) { 871573Srgrimes /* Not present in kernel, try loading it. */ 88132817Stjr if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) { 891573Srgrimes if (errno != EEXIST) { 901573Srgrimes pjdlog_exit(EX_OSERR, 911573Srgrimes "Unable to load geom_gate module"); 921573Srgrimes } 93132817Stjr } 941573Srgrimes } 9519276Sache} 9619276Sache 97243779Smarcelvoid 98243779Smarceldescriptors_cleanup(struct hast_resource *res) 99243779Smarcel{ 100243779Smarcel struct hast_resource *tres; 101243779Smarcel 102243779Smarcel TAILQ_FOREACH(tres, &cfg->hc_resources, hr_next) { 103243779Smarcel if (tres == res) { 104243779Smarcel PJDLOG_VERIFY(res->hr_role == HAST_ROLE_SECONDARY || 105243779Smarcel (res->hr_remotein == NULL && 106243779Smarcel res->hr_remoteout == NULL)); 107243779Smarcel continue; 108243779Smarcel } 109243779Smarcel if (tres->hr_remotein != NULL) 110243779Smarcel proto_close(tres->hr_remotein); 111243779Smarcel if (tres->hr_remoteout != NULL) 112243779Smarcel proto_close(tres->hr_remoteout); 113243779Smarcel if (tres->hr_ctrl != NULL) 114243779Smarcel proto_close(tres->hr_ctrl); 115243779Smarcel if (tres->hr_event != NULL) 116304284Sache proto_close(tres->hr_event); 117304284Sache if (tres->hr_conn != NULL) 118304284Sache proto_close(tres->hr_conn); 119304284Sache } 120304284Sache if (cfg->hc_controlin != NULL) 121304284Sache proto_close(cfg->hc_controlin); 122304284Sache proto_close(cfg->hc_controlconn); 123304284Sache proto_close(cfg->hc_listenconn); 124304284Sache (void)pidfile_close(pfh); 125304284Sache hook_fini(); 126304284Sache pjdlog_fini(); 127304284Sache} 128304284Sache 129304284Sachestatic const char * 1301573Srgrimesdtype2str(mode_t mode) 131132817Stjr{ 132132817Stjr 133132817Stjr if (S_ISBLK(mode)) 134132817Stjr return ("block device"); 1351573Srgrimes else if (S_ISCHR(mode)) 136132817Stjr return ("character device"); 1371573Srgrimes else if (S_ISDIR(mode)) 138132817Stjr return ("directory"); 1391573Srgrimes else if (S_ISFIFO(mode)) 140304284Sache return ("pipe or FIFO"); 141304284Sache else if (S_ISLNK(mode)) 142304284Sache return ("symbolic link"); 143304284Sache else if (S_ISREG(mode)) 144304284Sache return ("regular file"); 145304284Sache else if (S_ISSOCK(mode)) 146304284Sache return ("socket"); 1471573Srgrimes else if (S_ISWHT(mode)) 148304284Sache return ("whiteout"); 149304284Sache else 150304284Sache return ("unknown"); 1511573Srgrimes} 15290045Sobrien 153158812Sachevoid 15490045Sobriendescriptors_assert(const struct hast_resource *res, int pjdlogmode) 15590045Sobrien{ 156180021Smtm char msg[256]; 1571573Srgrimes struct stat sb; 15890045Sobrien long maxfd; 1591573Srgrimes bool isopen; 16090045Sobrien mode_t mode; 161304284Sache int fd; 162304284Sache 163243779Smarcel /* 164243779Smarcel * At this point descriptor to syslog socket is closed, so if we want 165243779Smarcel * to log assertion message, we have to first store it in 'msg' local 166243779Smarcel * buffer and then open syslog socket and log it. 167243779Smarcel */ 168304284Sache msg[0] = '\0'; 169304284Sache 170243779Smarcel maxfd = sysconf(_SC_OPEN_MAX); 17190045Sobrien if (maxfd < 0) { 172304284Sache pjdlog_init(pjdlogmode); 173304284Sache pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 174243779Smarcel role2str(res->hr_role)); 175304284Sache pjdlog_errno(LOG_WARNING, "sysconf(_SC_OPEN_MAX) failed"); 176243779Smarcel pjdlog_fini(); 177304284Sache maxfd = 16384; 178304284Sache } 17990045Sobrien for (fd = 0; fd <= maxfd; fd++) { 180304284Sache if (fstat(fd, &sb) == 0) { 181304284Sache isopen = true; 1821573Srgrimes mode = sb.st_mode; 18390045Sobrien } else if (errno == EBADF) { 1841573Srgrimes isopen = false; 1851573Srgrimes mode = 0; 1861573Srgrimes } else { 187228754Seadler (void)snprintf(msg, sizeof(msg), 188228754Seadler "Unable to fstat descriptor %d: %s", fd, 1891573Srgrimes strerror(errno)); 190243779Smarcel break; 191159294Sdelphij } 192132817Stjr if (fd == STDIN_FILENO || fd == STDOUT_FILENO || 193132817Stjr fd == STDERR_FILENO) { 194132817Stjr if (!isopen) { 195132817Stjr (void)snprintf(msg, sizeof(msg), 196304284Sache "Descriptor %d (%s) is closed, but should be open.", 1971573Srgrimes fd, (fd == STDIN_FILENO ? "stdin" : 198159294Sdelphij (fd == STDOUT_FILENO ? "stdout" : "stderr"))); 1991573Srgrimes break; 2001573Srgrimes } 2011573Srgrimes } else if (fd == proto_descriptor(res->hr_event)) { 2021573Srgrimes if (!isopen) { 2031573Srgrimes (void)snprintf(msg, sizeof(msg), 2041573Srgrimes "Descriptor %d (event) is closed, but should be open.", 20580525Smikeh fd); 206243779Smarcel break; 207243779Smarcel } 208243779Smarcel if (!S_ISSOCK(mode)) { 209243779Smarcel (void)snprintf(msg, sizeof(msg), 2101573Srgrimes "Descriptor %d (event) is %s, but should be %s.", 2111573Srgrimes fd, dtype2str(mode), dtype2str(S_IFSOCK)); 2121573Srgrimes break; 2131573Srgrimes } 2141573Srgrimes } else if (fd == proto_descriptor(res->hr_ctrl)) { 21574963Speter if (!isopen) { 216304284Sache (void)snprintf(msg, sizeof(msg), 217132817Stjr "Descriptor %d (ctrl) is closed, but should be open.", 218132817Stjr fd); 219304284Sache break; 220132817Stjr } 221132817Stjr if (!S_ISSOCK(mode)) { 222304284Sache (void)snprintf(msg, sizeof(msg), 223304284Sache "Descriptor %d (ctrl) is %s, but should be %s.", 224304284Sache fd, dtype2str(mode), dtype2str(S_IFSOCK)); 225132817Stjr break; 226304284Sache } 227132817Stjr } else if (res->hr_role == HAST_ROLE_PRIMARY && 228132817Stjr fd == proto_descriptor(res->hr_conn)) { 229132817Stjr if (!isopen) { 230132817Stjr (void)snprintf(msg, sizeof(msg), 2311573Srgrimes "Descriptor %d (conn) is closed, but should be open.", 232132817Stjr fd); 233304284Sache break; 234304284Sache } 235304284Sache if (!S_ISSOCK(mode)) { 236304284Sache (void)snprintf(msg, sizeof(msg), 237132817Stjr "Descriptor %d (conn) is %s, but should be %s.", 2381573Srgrimes fd, dtype2str(mode), dtype2str(S_IFSOCK)); 239132817Stjr break; 240132817Stjr } 241132817Stjr } else if (res->hr_role == HAST_ROLE_SECONDARY && 242132817Stjr res->hr_conn != NULL && 243132817Stjr fd == proto_descriptor(res->hr_conn)) { 244304284Sache if (isopen) { 245304284Sache (void)snprintf(msg, sizeof(msg), 246304284Sache "Descriptor %d (conn) is open, but should be closed.", 247132817Stjr fd); 248304284Sache break; 249132817Stjr } 250132817Stjr } else if (res->hr_role == HAST_ROLE_SECONDARY && 251132817Stjr fd == proto_descriptor(res->hr_remotein)) { 2521573Srgrimes if (!isopen) { 253304284Sache (void)snprintf(msg, sizeof(msg), 254304284Sache "Descriptor %d (remote in) is closed, but should be open.", 2551573Srgrimes fd); 2561573Srgrimes break; 2571573Srgrimes } 258304284Sache if (!S_ISSOCK(mode)) { 2591573Srgrimes (void)snprintf(msg, sizeof(msg), 260304284Sache "Descriptor %d (remote in) is %s, but should be %s.", 2611573Srgrimes fd, dtype2str(mode), dtype2str(S_IFSOCK)); 2621573Srgrimes break; 263304284Sache } 264304284Sache } else if (res->hr_role == HAST_ROLE_SECONDARY && 265304284Sache fd == proto_descriptor(res->hr_remoteout)) { 266304284Sache if (!isopen) { 267304284Sache (void)snprintf(msg, sizeof(msg), 268304284Sache "Descriptor %d (remote out) is closed, but should be open.", 269304284Sache fd); 270304284Sache break; 271304284Sache } 272304284Sache if (!S_ISSOCK(mode)) { 273304284Sache (void)snprintf(msg, sizeof(msg), 274304284Sache "Descriptor %d (remote out) is %s, but should be %s.", 275304284Sache fd, dtype2str(mode), dtype2str(S_IFSOCK)); 276304284Sache break; 277304284Sache } 278304284Sache } else { 279304284Sache if (isopen) { 280304284Sache (void)snprintf(msg, sizeof(msg), 281304284Sache "Descriptor %d is open (%s), but should be closed.", 282304284Sache fd, dtype2str(mode)); 283304284Sache break; 284304284Sache } 285304284Sache } 286304284Sache } 2871573Srgrimes if (msg[0] != '\0') { 2881573Srgrimes pjdlog_init(pjdlogmode); 2891573Srgrimes pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 2901573Srgrimes role2str(res->hr_role)); 2911573Srgrimes PJDLOG_ABORT("%s", msg); 29274963Speter } 293243779Smarcel} 2941573Srgrimes 295304284Sachestatic void 2961573Srgrimeschild_exit_log(unsigned int pid, int status) 297304284Sache{ 298304284Sache 299304284Sache if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 300304284Sache pjdlog_debug(1, "Worker process exited gracefully (pid=%u).", 301304284Sache pid); 302304284Sache } else if (WIFSIGNALED(status)) { 303304284Sache pjdlog_error("Worker process killed (pid=%u, signal=%d).", 304243779Smarcel pid, WTERMSIG(status)); 305243779Smarcel } else { 306304284Sache pjdlog_error("Worker process exited ungracefully (pid=%u, exitcode=%d).", 3071573Srgrimes pid, WIFEXITED(status) ? WEXITSTATUS(status) : -1); 3081573Srgrimes } 3091573Srgrimes} 3101573Srgrimes 3111573Srgrimesstatic void 3121573Srgrimeschild_exit(void) 3131573Srgrimes{ 3141573Srgrimes struct hast_resource *res; 31574963Speter int status; 316304284Sache pid_t pid; 317243779Smarcel 3181573Srgrimes while ((pid = wait3(&status, WNOHANG, NULL)) > 0) { 319304284Sache /* Find resource related to the process that just exited. */ 3201573Srgrimes TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 321150137Sache if (pid == res->hr_workerpid) 32274963Speter break; 3231573Srgrimes } 3241573Srgrimes if (res == NULL) { 3251573Srgrimes /* 3261573Srgrimes * This can happen when new connection arrives and we 32774963Speter * cancel child responsible for the old one or if this 3281573Srgrimes * was hook which we executed. 3291573Srgrimes */ 3301573Srgrimes hook_check_one(pid, status); 331304284Sache continue; 3321573Srgrimes } 3331573Srgrimes pjdlog_prefix_set("[%s] (%s) ", res->hr_name, 3341573Srgrimes role2str(res->hr_role)); 3351573Srgrimes child_exit_log(pid, status); 3361573Srgrimes child_cleanup(res); 3378870Srgrimes if (res->hr_role == HAST_ROLE_PRIMARY) { 3381573Srgrimes /* 3391573Srgrimes * Restart child process if it was killed by signal 3401573Srgrimes * or exited because of temporary problem. 3411573Srgrimes */ 3421573Srgrimes if (WIFSIGNALED(status) || 3431573Srgrimes (WIFEXITED(status) && 3441573Srgrimes WEXITSTATUS(status) == EX_TEMPFAIL)) { 3451573Srgrimes sleep(1); 3461573Srgrimes pjdlog_info("Restarting worker process."); 3471573Srgrimes hastd_primary(res); 3481573Srgrimes } else { 3491573Srgrimes res->hr_role = HAST_ROLE_INIT; 3501573Srgrimes pjdlog_info("Changing resource role back to %s.", 3511573Srgrimes role2str(res->hr_role)); 3521573Srgrimes } 353304284Sache } 354304284Sache pjdlog_prefix_set("%s", ""); 3551573Srgrimes } 3561573Srgrimes} 3571573Srgrimes 3581573Srgrimesstatic bool 3591573Srgrimesresource_needs_restart(const struct hast_resource *res0, 360150137Sache const struct hast_resource *res1) 3611573Srgrimes{ 3621573Srgrimes 3638870Srgrimes PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0); 3641573Srgrimes 3651573Srgrimes if (strcmp(res0->hr_provname, res1->hr_provname) != 0) 3661573Srgrimes return (true); 367150137Sache if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0) 3681573Srgrimes return (true); 3691573Srgrimes if (res0->hr_role == HAST_ROLE_INIT || 3701573Srgrimes res0->hr_role == HAST_ROLE_SECONDARY) { 3711573Srgrimes if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) 3721573Srgrimes return (true); 3731573Srgrimes if (strcmp(res0->hr_sourceaddr, res1->hr_sourceaddr) != 0) 3741573Srgrimes return (true); 3751573Srgrimes if (res0->hr_replication != res1->hr_replication) 3761573Srgrimes return (true); 3771573Srgrimes if (res0->hr_checksum != res1->hr_checksum) 3781573Srgrimes return (true); 3791573Srgrimes if (res0->hr_compression != res1->hr_compression) 3801573Srgrimes return (true); 3811573Srgrimes if (res0->hr_timeout != res1->hr_timeout) 3821573Srgrimes return (true); 3831573Srgrimes if (strcmp(res0->hr_exec, res1->hr_exec) != 0) 3841573Srgrimes return (true); 3851573Srgrimes } 3861573Srgrimes return (false); 3871573Srgrimes} 3888870Srgrimes 3891573Srgrimesstatic bool 3901573Srgrimesresource_needs_reload(const struct hast_resource *res0, 3911573Srgrimes const struct hast_resource *res1) 3921573Srgrimes{ 3931573Srgrimes 3941573Srgrimes PJDLOG_ASSERT(strcmp(res0->hr_name, res1->hr_name) == 0); 3951573Srgrimes PJDLOG_ASSERT(strcmp(res0->hr_provname, res1->hr_provname) == 0); 3961573Srgrimes PJDLOG_ASSERT(strcmp(res0->hr_localpath, res1->hr_localpath) == 0); 3971573Srgrimes 3981573Srgrimes if (res0->hr_role != HAST_ROLE_PRIMARY) 399304284Sache return (false); 400304284Sache 401304284Sache if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) 4021573Srgrimes return (true); 4031573Srgrimes if (strcmp(res0->hr_sourceaddr, res1->hr_sourceaddr) != 0) 4041573Srgrimes return (true); 4051573Srgrimes if (res0->hr_replication != res1->hr_replication) 4061573Srgrimes return (true); 4071573Srgrimes if (res0->hr_checksum != res1->hr_checksum) 4081573Srgrimes return (true); 4091573Srgrimes if (res0->hr_compression != res1->hr_compression) 4101573Srgrimes return (true); 411228755Seadler if (res0->hr_timeout != res1->hr_timeout) 4121573Srgrimes return (true); 4131573Srgrimes if (strcmp(res0->hr_exec, res1->hr_exec) != 0) 4141573Srgrimes return (true); 4151573Srgrimes return (false); 4161573Srgrimes} 4171573Srgrimes 4181573Srgrimesstatic void 4191573Srgrimesresource_reload(const struct hast_resource *res) 420159294Sdelphij{ 4211573Srgrimes struct nv *nvin, *nvout; 4221573Srgrimes int error; 423304284Sache 4241573Srgrimes PJDLOG_ASSERT(res->hr_role == HAST_ROLE_PRIMARY); 42524158Simp 426304284Sache nvout = nv_alloc(); 427304284Sache nv_add_uint8(nvout, HASTCTL_RELOAD, "cmd"); 428304284Sache nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr"); 429304284Sache nv_add_string(nvout, res->hr_sourceaddr, "sourceaddr"); 430304284Sache nv_add_int32(nvout, (int32_t)res->hr_replication, "replication"); 431304284Sache nv_add_int32(nvout, (int32_t)res->hr_checksum, "checksum"); 4321573Srgrimes nv_add_int32(nvout, (int32_t)res->hr_compression, "compression"); 4331573Srgrimes nv_add_int32(nvout, (int32_t)res->hr_timeout, "timeout"); 434228755Seadler nv_add_string(nvout, res->hr_exec, "exec"); 4351573Srgrimes if (nv_error(nvout) != 0) { 43624158Simp nv_free(nvout); 43724158Simp pjdlog_error("Unable to allocate header for reload message."); 43824158Simp return; 43924158Simp } 440304284Sache if (hast_proto_send(res, res->hr_ctrl, nvout, NULL, 0) < 0) { 441304284Sache pjdlog_errno(LOG_ERR, "Unable to send reload message"); 4421573Srgrimes nv_free(nvout); 4431573Srgrimes return; 444304284Sache } 445304284Sache nv_free(nvout); 4461573Srgrimes 447304284Sache /* Receive response. */ 448304284Sache if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 449304284Sache pjdlog_errno(LOG_ERR, "Unable to receive reload reply"); 450304284Sache return; 4518870Srgrimes } 45228820Simp error = nv_get_int16(nvin, "error"); 45328820Simp nv_free(nvin); 45428820Simp if (error != 0) { 4551573Srgrimes pjdlog_common(LOG_ERR, 0, error, "Reload failed"); 456121667Stjr return; 45733664Sjb } 45828836Sache} 45928836Sache 46028836Sachestatic void 46128836Sachehastd_reload(void) 46228836Sache{ 463228755Seadler struct hastd_config *newcfg; 4641573Srgrimes struct hast_resource *nres, *cres, *tres; 4651573Srgrimes uint8_t role; 4661573Srgrimes 4671573Srgrimes pjdlog_info("Reloading configuration..."); 4681573Srgrimes 4691573Srgrimes newcfg = yy_config_parse(cfgpath, false); 470304284Sache if (newcfg == NULL) 471304284Sache goto failed; 472304284Sache 473228755Seadler /* 4741573Srgrimes * Check if control address has changed. 4751573Srgrimes */ 4761573Srgrimes if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) { 4771573Srgrimes if (proto_server(newcfg->hc_controladdr, 4781573Srgrimes &newcfg->hc_controlconn) < 0) { 479304284Sache pjdlog_errno(LOG_ERR, 480304284Sache "Unable to listen on control address %s", 481304284Sache newcfg->hc_controladdr); 482304284Sache goto failed; 483304284Sache } 484304284Sache } 485304284Sache /* 486304284Sache * Check if listen address has changed. 487304284Sache */ 488304284Sache if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) { 489304284Sache if (proto_server(newcfg->hc_listenaddr, 490304284Sache &newcfg->hc_listenconn) < 0) { 491304284Sache pjdlog_errno(LOG_ERR, "Unable to listen on address %s", 492304284Sache newcfg->hc_listenaddr); 493304284Sache goto failed; 494304284Sache } 495304284Sache } 496304284Sache /* 497304284Sache * Only when both control and listen sockets are successfully 498304284Sache * initialized switch them to new configuration. 499304284Sache */ 500304284Sache if (newcfg->hc_controlconn != NULL) { 501304284Sache pjdlog_info("Control socket changed from %s to %s.", 502304284Sache cfg->hc_controladdr, newcfg->hc_controladdr); 5031573Srgrimes proto_close(cfg->hc_controlconn); 504304284Sache cfg->hc_controlconn = newcfg->hc_controlconn; 505304284Sache newcfg->hc_controlconn = NULL; 5068870Srgrimes strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr, 5071573Srgrimes sizeof(cfg->hc_controladdr)); 508304284Sache } 509304284Sache if (newcfg->hc_listenconn != NULL) { 510304284Sache pjdlog_info("Listen socket changed from %s to %s.", 511304284Sache cfg->hc_listenaddr, newcfg->hc_listenaddr); 512304284Sache proto_close(cfg->hc_listenconn); 513304284Sache cfg->hc_listenconn = newcfg->hc_listenconn; 514304284Sache newcfg->hc_listenconn = NULL; 515304284Sache strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr, 516304284Sache sizeof(cfg->hc_listenaddr)); 517304284Sache } 518304284Sache 519304284Sache /* 5201573Srgrimes * Stop and remove resources that were removed from the configuration. 521228755Seadler */ 5221573Srgrimes TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) { 5231573Srgrimes TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) { 5248870Srgrimes if (strcmp(cres->hr_name, nres->hr_name) == 0) 5251573Srgrimes break; 5261573Srgrimes } 5271573Srgrimes if (nres == NULL) { 5281573Srgrimes control_set_role(cres, HAST_ROLE_INIT); 529100217Smikeh TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); 5301573Srgrimes pjdlog_info("Resource %s removed.", cres->hr_name); 5311573Srgrimes free(cres); 532304284Sache } 533304284Sache } 5341573Srgrimes /* 535207981Sgordon * Move new resources to the current configuration. 536158812Sache */ 537207981Sgordon TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { 5381573Srgrimes TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { 53974963Speter if (strcmp(cres->hr_name, nres->hr_name) == 0) 540304284Sache break; 541304284Sache } 542304284Sache if (cres == NULL) { 543304284Sache TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); 5441573Srgrimes TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); 5451573Srgrimes pjdlog_info("Resource %s added.", nres->hr_name); 5461573Srgrimes } 5471573Srgrimes } 5481573Srgrimes /* 5491573Srgrimes * Deal with modified resources. 5501573Srgrimes * Depending on what has changed exactly we might want to perform 5511573Srgrimes * different actions. 5521573Srgrimes * 5531573Srgrimes * We do full resource restart in the following situations: 5541573Srgrimes * Resource role is INIT or SECONDARY. 555180021Smtm * Resource role is PRIMARY and path to local component or provider 5561573Srgrimes * name has changed. 5571573Srgrimes * In case of PRIMARY, the worker process will be killed and restarted, 5581573Srgrimes * which also means removing /dev/hast/<name> provider and 5591573Srgrimes * recreating it. 5601573Srgrimes * 5611573Srgrimes * We do just reload (send SIGHUP to worker process) if we act as 5621573Srgrimes * PRIMARY, but only if remote address, replication mode, timeout or 5631573Srgrimes * execution path has changed. For those, there is no need to restart 5641573Srgrimes * worker process. 5651573Srgrimes * If PRIMARY receives SIGHUP, it will reconnect if remote address or 5661573Srgrimes * replication mode has changed or simply set new timeout if only 5671573Srgrimes * timeout has changed. 5681573Srgrimes */ 5691573Srgrimes TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { 5701573Srgrimes TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { 5711573Srgrimes if (strcmp(cres->hr_name, nres->hr_name) == 0) 5721573Srgrimes break; 5731573Srgrimes } 5741573Srgrimes PJDLOG_ASSERT(cres != NULL); 5751573Srgrimes if (resource_needs_restart(cres, nres)) { 5761573Srgrimes pjdlog_info("Resource %s configuration was modified, restarting it.", 5771573Srgrimes cres->hr_name); 5781573Srgrimes role = cres->hr_role; 5791573Srgrimes control_set_role(cres, HAST_ROLE_INIT); 5801573Srgrimes TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); 5811573Srgrimes free(cres); 5821573Srgrimes TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); 5838870Srgrimes TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); 5841573Srgrimes control_set_role(nres, role); 5851573Srgrimes } else if (resource_needs_reload(cres, nres)) { 5861573Srgrimes pjdlog_info("Resource %s configuration was modified, reloading it.", 5871573Srgrimes cres->hr_name); 5881573Srgrimes strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr, 5891573Srgrimes sizeof(cres->hr_remoteaddr)); 5901573Srgrimes strlcpy(cres->hr_sourceaddr, nres->hr_sourceaddr, 5911573Srgrimes sizeof(cres->hr_sourceaddr)); 5921573Srgrimes cres->hr_replication = nres->hr_replication; 5931573Srgrimes cres->hr_checksum = nres->hr_checksum; 5941573Srgrimes cres->hr_compression = nres->hr_compression; 5951573Srgrimes cres->hr_timeout = nres->hr_timeout; 5961573Srgrimes strlcpy(cres->hr_exec, nres->hr_exec, 5971573Srgrimes sizeof(cres->hr_exec)); 5981573Srgrimes if (cres->hr_workerpid != 0) 59974469Sjlemon resource_reload(cres); 6001573Srgrimes } 6011573Srgrimes } 602304284Sache 603304284Sache yy_config_free(newcfg); 604304284Sache pjdlog_info("Configuration reloaded successfully."); 605304284Sache return; 606304284Sachefailed: 607304284Sache if (newcfg != NULL) { 608304284Sache if (newcfg->hc_controlconn != NULL) 609304284Sache proto_close(newcfg->hc_controlconn); 610304284Sache if (newcfg->hc_listenconn != NULL) 611304284Sache proto_close(newcfg->hc_listenconn); 612304284Sache yy_config_free(newcfg); 613304284Sache } 614100217Smikeh pjdlog_warning("Configuration not reloaded."); 6151573Srgrimes} 6161573Srgrimes 617304284Sachestatic void 618228755Seadlerterminate_workers(void) 6191573Srgrimes{ 6201573Srgrimes struct hast_resource *res; 6211573Srgrimes 622159294Sdelphij pjdlog_info("Termination signal received, exiting."); 6231573Srgrimes TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 624304284Sache if (res->hr_workerpid == 0) 6251573Srgrimes continue; 6261573Srgrimes pjdlog_info("Terminating worker process (resource=%s, role=%s, pid=%u).", 6271573Srgrimes res->hr_name, role2str(res->hr_role), res->hr_workerpid); 628243779Smarcel if (kill(res->hr_workerpid, SIGTERM) == 0) 6291573Srgrimes continue; 63074963Speter pjdlog_errno(LOG_WARNING, 6311573Srgrimes "Unable to send signal to worker process (resource=%s, role=%s, pid=%u).", 6321573Srgrimes res->hr_name, role2str(res->hr_role), res->hr_workerpid); 6331573Srgrimes } 634228755Seadler} 635228755Seadler 63674963Speterstatic void 6371573Srgrimeslisten_accept(void) 6381573Srgrimes{ 6391573Srgrimes struct hast_resource *res; 6401573Srgrimes struct proto_conn *conn; 6411573Srgrimes struct nv *nvin, *nvout, *nverr; 6421573Srgrimes const char *resname; 6431573Srgrimes const unsigned char *token; 6441573Srgrimes char laddr[256], raddr[256]; 645159294Sdelphij size_t size; 646243779Smarcel pid_t pid; 6471573Srgrimes int status; 6481573Srgrimes 6491573Srgrimes proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr)); 6501573Srgrimes pjdlog_debug(1, "Accepting connection to %s.", laddr); 6511573Srgrimes 6521573Srgrimes if (proto_accept(cfg->hc_listenconn, &conn) < 0) { 6531573Srgrimes pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr); 6541573Srgrimes return; 6551573Srgrimes } 6561573Srgrimes 6571573Srgrimes proto_local_address(conn, laddr, sizeof(laddr)); 6581573Srgrimes proto_remote_address(conn, raddr, sizeof(raddr)); 6591573Srgrimes pjdlog_info("Connection from %s to %s.", raddr, laddr); 660228755Seadler 6618870Srgrimes /* Error in setting timeout is not critical, but why should it fail? */ 662243779Smarcel if (proto_timeout(conn, HAST_TIMEOUT) < 0) 663243779Smarcel pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); 664304284Sache 665243779Smarcel nvin = nvout = nverr = NULL; 666243779Smarcel 667304284Sache /* 668304284Sache * Before receiving any data see if remote host have access to any 669304284Sache * resource. 670304284Sache */ 671304284Sache TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 6721573Srgrimes if (proto_address_match(conn, res->hr_remoteaddr)) 673304284Sache break; 674304284Sache } 675304284Sache if (res == NULL) { 676304284Sache pjdlog_error("Client %s isn't known.", raddr); 6771573Srgrimes goto close; 6781573Srgrimes } 6791573Srgrimes /* Ok, remote host can access at least one resource. */ 6801573Srgrimes 681304284Sache if (hast_proto_recv_hdr(conn, &nvin) < 0) { 6821573Srgrimes pjdlog_errno(LOG_ERR, "Unable to receive header from %s", 6831573Srgrimes raddr); 6841573Srgrimes goto close; 6851573Srgrimes } 6861573Srgrimes 687304284Sache resname = nv_get_string(nvin, "resource"); 6881573Srgrimes if (resname == NULL) { 6891573Srgrimes pjdlog_error("No 'resource' field in the header received from %s.", 690304284Sache raddr); 691304284Sache goto close; 692304284Sache } 693304284Sache pjdlog_debug(2, "%s: resource=%s", raddr, resname); 6941573Srgrimes token = nv_get_uint8_array(nvin, &size, "token"); 6951573Srgrimes /* 6961573Srgrimes * NULL token means that this is first conection. 6971573Srgrimes */ 6981573Srgrimes if (token != NULL && size != sizeof(res->hr_token)) { 6991573Srgrimes pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).", 700304284Sache raddr, sizeof(res->hr_token), size); 701304284Sache goto close; 702304284Sache } 703304284Sache 704304284Sache /* 7051573Srgrimes * From now on we want to send errors to the remote node. 70674963Speter */ 7071573Srgrimes nverr = nv_alloc(); 708228755Seadler 709228755Seadler /* Find resource related to this connection. */ 7101573Srgrimes TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 7111573Srgrimes if (strcmp(resname, res->hr_name) == 0) 7121573Srgrimes break; 7131573Srgrimes } 7141573Srgrimes /* Have we found the resource? */ 715159294Sdelphij if (res == NULL) { 716159294Sdelphij pjdlog_error("No resource '%s' as requested by %s.", 717243779Smarcel resname, raddr); 7181573Srgrimes nv_add_stringf(nverr, "errmsg", "Resource not configured."); 71990045Sobrien goto fail; 7201573Srgrimes } 721304284Sache 722304284Sache /* Now that we know resource name setup log prefix. */ 7231573Srgrimes pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 724288098Srodrigc 7251573Srgrimes /* Does the remote host have access to this resource? */ 726304284Sache if (!proto_address_match(conn, res->hr_remoteaddr)) { 727304284Sache pjdlog_error("Client %s has no access to the resource.", raddr); 728304284Sache nv_add_stringf(nverr, "errmsg", "No access to the resource."); 729304284Sache goto fail; 7301573Srgrimes } 731304284Sache /* Is the resource marked as secondary? */ 732304284Sache if (res->hr_role != HAST_ROLE_SECONDARY) { 733304284Sache pjdlog_warning("We act as %s for the resource and not as %s as requested by %s.", 734304284Sache role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY), 735304284Sache raddr); 736304284Sache nv_add_stringf(nverr, "errmsg", 737304284Sache "Remote node acts as %s for the resource and not as %s.", 7381573Srgrimes role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY)); 7391573Srgrimes if (res->hr_role == HAST_ROLE_PRIMARY) { 740304284Sache /* 741304284Sache * If we act as primary request the other side to wait 742304284Sache * for us a bit, as we might be finishing cleanups. 743304284Sache */ 744304284Sache nv_add_uint8(nverr, 1, "wait"); 745304284Sache } 7461573Srgrimes goto fail; 7471573Srgrimes } 7481573Srgrimes /* Does token (if exists) match? */ 7491573Srgrimes if (token != NULL && memcmp(token, res->hr_token, 750288098Srodrigc sizeof(res->hr_token)) != 0) { 7511573Srgrimes pjdlog_error("Token received from %s doesn't match.", raddr); 752288098Srodrigc nv_add_stringf(nverr, "errmsg", "Token doesn't match."); 7531573Srgrimes goto fail; 7541573Srgrimes } 755288098Srodrigc /* 756304284Sache * If there is no token, but we have half-open connection 757288098Srodrigc * (only remotein) or full connection (worker process is running) 758288098Srodrigc * we have to cancel those and accept the new connection. 759159294Sdelphij */ 76090045Sobrien if (token == NULL) { 761132817Stjr PJDLOG_ASSERT(res->hr_remoteout == NULL); 762132817Stjr pjdlog_debug(1, "Initial connection from %s.", raddr); 763132817Stjr if (res->hr_workerpid != 0) { 7641573Srgrimes PJDLOG_ASSERT(res->hr_remotein == NULL); 765243779Smarcel pjdlog_debug(1, 766243779Smarcel "Worker process exists (pid=%u), stopping it.", 767304284Sache (unsigned int)res->hr_workerpid); 768304284Sache /* Stop child process. */ 769243779Smarcel if (kill(res->hr_workerpid, SIGINT) < 0) { 770243779Smarcel pjdlog_errno(LOG_ERR, 771243779Smarcel "Unable to stop worker process (pid=%u)", 7721573Srgrimes (unsigned int)res->hr_workerpid); 773304284Sache /* 774304284Sache * Other than logging the problem we 7751573Srgrimes * ignore it - nothing smart to do. 776304284Sache */ 777132817Stjr } 77874963Speter /* Wait for it to exit. */ 779159294Sdelphij else if ((pid = waitpid(res->hr_workerpid, 780304284Sache &status, 0)) != res->hr_workerpid) { 781304284Sache /* We can only log the problem. */ 782132817Stjr pjdlog_errno(LOG_ERR, 783132817Stjr "Waiting for worker process (pid=%u) failed", 784304284Sache (unsigned int)res->hr_workerpid); 785304284Sache } else { 786132817Stjr child_exit_log(res->hr_workerpid, status); 787132817Stjr } 788132817Stjr child_cleanup(res); 789304284Sache } else if (res->hr_remotein != NULL) { 790304284Sache char oaddr[256]; 791132817Stjr 792304284Sache proto_remote_address(res->hr_remotein, oaddr, 793132817Stjr sizeof(oaddr)); 794132817Stjr pjdlog_debug(1, 795304284Sache "Canceling half-open connection from %s on connection from %s.", 796304284Sache oaddr, raddr); 797304284Sache proto_close(res->hr_remotein); 798304284Sache res->hr_remotein = NULL; 799304284Sache } 800304284Sache } 8011573Srgrimes 802304284Sache /* 8031573Srgrimes * Checks and cleanups are done. 8041573Srgrimes */ 805304284Sache 806304284Sache if (token == NULL) { 80774963Speter arc4random_buf(res->hr_token, sizeof(res->hr_token)); 80874963Speter nvout = nv_alloc(); 8091573Srgrimes nv_add_uint8_array(nvout, res->hr_token, 8101573Srgrimes sizeof(res->hr_token), "token"); 811304284Sache if (nv_error(nvout) != 0) { 8121573Srgrimes pjdlog_common(LOG_ERR, 0, nv_error(nvout), 8131573Srgrimes "Unable to prepare return header for %s", raddr); 814304284Sache nv_add_stringf(nverr, "errmsg", 8151573Srgrimes "Remote node was unable to prepare return header: %s.", 8161573Srgrimes strerror(nv_error(nvout))); 8171573Srgrimes goto fail; 8181573Srgrimes } 819304284Sache if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) { 820304284Sache int error = errno; 821304284Sache 822304284Sache pjdlog_errno(LOG_ERR, "Unable to send response to %s", 823304284Sache raddr); 824304284Sache nv_add_stringf(nverr, "errmsg", 825304284Sache "Remote node was unable to send response: %s.", 826304284Sache strerror(error)); 827304284Sache goto fail; 828304284Sache } 829304284Sache res->hr_remotein = conn; 830304284Sache pjdlog_debug(1, "Incoming connection from %s configured.", 8311573Srgrimes raddr); 8321573Srgrimes } else { 8331573Srgrimes res->hr_remoteout = conn; 8341573Srgrimes pjdlog_debug(1, "Outgoing connection to %s configured.", raddr); 835249381Semaste hastd_secondary(res, nvin); 8361573Srgrimes } 8371573Srgrimes nv_free(nvin); 8381573Srgrimes nv_free(nvout); 8391573Srgrimes nv_free(nverr); 8401573Srgrimes pjdlog_prefix_set("%s", ""); 8411573Srgrimes return; 8421573Srgrimesfail: 8431573Srgrimes if (nv_error(nverr) != 0) { 8441573Srgrimes pjdlog_common(LOG_ERR, 0, nv_error(nverr), 8451573Srgrimes "Unable to prepare error header for %s", raddr); 8461573Srgrimes goto close; 8471573Srgrimes } 8481573Srgrimes if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) { 849304284Sache pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr); 850304284Sache goto close; 8511573Srgrimes } 85290045Sobrienclose: 853316613Spfg if (nvin != NULL) 8541573Srgrimes nv_free(nvin); 8551573Srgrimes if (nvout != NULL) 8561573Srgrimes nv_free(nvout); 857243779Smarcel if (nverr != NULL) 858243779Smarcel nv_free(nverr); 859304284Sache proto_close(conn); 86080525Smikeh pjdlog_prefix_set("%s", ""); 86180525Smikeh} 86274307Sjlemon 863316613Spfgstatic void 864316613Spfgconnection_migrate(struct hast_resource *res) 865316613Spfg{ 866243758Smarcel struct proto_conn *conn; 867228755Seadler int16_t val = 0; 8681573Srgrimes 8691573Srgrimes pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 8701573Srgrimes 8711573Srgrimes PJDLOG_ASSERT(res->hr_role == HAST_ROLE_PRIMARY); 872158812Sache 8731573Srgrimes if (proto_recv(res->hr_conn, &val, sizeof(val)) < 0) { 8741573Srgrimes pjdlog_errno(LOG_WARNING, 8751573Srgrimes "Unable to receive connection command"); 8761573Srgrimes return; 877304284Sache } 878304284Sache if (proto_client(res->hr_sourceaddr[0] != '\0' ? res->hr_sourceaddr : NULL, 879304284Sache res->hr_remoteaddr, &conn) < 0) { 880304284Sache val = errno; 881304284Sache pjdlog_errno(LOG_WARNING, 882304284Sache "Unable to create outgoing connection to %s", 883304284Sache res->hr_remoteaddr); 884304284Sache goto out; 885304284Sache } 886304284Sache if (proto_connect(conn, -1) < 0) { 887304284Sache val = errno; 888304284Sache pjdlog_errno(LOG_WARNING, "Unable to connect to %s", 889304284Sache res->hr_remoteaddr); 890243779Smarcel proto_close(conn); 891304284Sache goto out; 892304284Sache } 893304284Sache val = 0; 894304284Sacheout: 89574918Speter if (proto_send(res->hr_conn, &val, sizeof(val)) < 0) { 896304284Sache pjdlog_errno(LOG_WARNING, 89774918Speter "Unable to send reply to connection request"); 89874918Speter } 8991573Srgrimes if (val == 0 && proto_connection_send(res->hr_conn, conn) < 0) 9001573Srgrimes pjdlog_errno(LOG_WARNING, "Unable to send connection"); 9011573Srgrimes 902228755Seadler pjdlog_prefix_set("%s", ""); 9031573Srgrimes} 9041573Srgrimes 9051573Srgrimesstatic void 9061573Srgrimescheck_signals(void) 9071573Srgrimes{ 9081573Srgrimes struct timespec sigtimeout; 9091573Srgrimes sigset_t mask; 910159294Sdelphij int signo; 9111573Srgrimes 9121573Srgrimes sigtimeout.tv_sec = 0; 9131573Srgrimes sigtimeout.tv_nsec = 0; 914227753Stheraven 915227753Stheraven PJDLOG_VERIFY(sigemptyset(&mask) == 0); 9161573Srgrimes PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); 9171573Srgrimes PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); 9181573Srgrimes PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0); 9191573Srgrimes PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0); 9201573Srgrimes 9211573Srgrimes while ((signo = sigtimedwait(&mask, NULL, &sigtimeout)) != -1) { 922228755Seadler switch (signo) { 9238870Srgrimes case SIGINT: 9241573Srgrimes case SIGTERM: 925228755Seadler sigexit_received = true; 9261573Srgrimes terminate_workers(); 927228755Seadler proto_close(cfg->hc_controlconn); 9281573Srgrimes exit(EX_OK); 9291573Srgrimes break; 930228755Seadler case SIGCHLD: 9311573Srgrimes child_exit(); 9321573Srgrimes break; 9331573Srgrimes case SIGHUP: 9341573Srgrimes hastd_reload(); 935228755Seadler break; 936304284Sache default: 9371573Srgrimes PJDLOG_ABORT("Unexpected signal (%d).", signo); 9381573Srgrimes } 9391573Srgrimes } 940227753Stheraven} 941304284Sache 942304284Sachestatic void 943304284Sachemain_loop(void) 944304284Sache{ 945304284Sache struct hast_resource *res; 946304284Sache struct timeval seltimeout; 9471573Srgrimes int fd, maxfd, ret; 9481573Srgrimes time_t lastcheck, now; 9491573Srgrimes fd_set rfds; 9501573Srgrimes 9511573Srgrimes lastcheck = time(NULL); 952228755Seadler seltimeout.tv_sec = REPORT_INTERVAL; 9531573Srgrimes seltimeout.tv_usec = 0; 9541573Srgrimes 9551573Srgrimes pjdlog_info("Started successfully, running protocol version %d.", 956228755Seadler HAST_PROTO_VERSION); 9571573Srgrimes 9581573Srgrimes for (;;) { 9591573Srgrimes check_signals(); 960228755Seadler 9611573Srgrimes /* Setup descriptors for select(2). */ 9621573Srgrimes FD_ZERO(&rfds); 9631573Srgrimes maxfd = fd = proto_descriptor(cfg->hc_controlconn); 9641573Srgrimes PJDLOG_ASSERT(fd >= 0); 965159294Sdelphij FD_SET(fd, &rfds); 9661573Srgrimes fd = proto_descriptor(cfg->hc_listenconn); 967158812Sache PJDLOG_ASSERT(fd >= 0); 96890045Sobrien FD_SET(fd, &rfds); 9691573Srgrimes maxfd = fd > maxfd ? fd : maxfd; 9701573Srgrimes TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 9711573Srgrimes if (res->hr_event == NULL) 9721573Srgrimes continue; 9731573Srgrimes fd = proto_descriptor(res->hr_event); 9741573Srgrimes PJDLOG_ASSERT(fd >= 0); 9751573Srgrimes FD_SET(fd, &rfds); 97674918Speter maxfd = fd > maxfd ? fd : maxfd; 9771573Srgrimes if (res->hr_role == HAST_ROLE_PRIMARY) { 9781573Srgrimes /* Only primary workers asks for connections. */ 9791573Srgrimes PJDLOG_ASSERT(res->hr_conn != NULL); 9801573Srgrimes fd = proto_descriptor(res->hr_conn); 981159294Sdelphij PJDLOG_ASSERT(fd >= 0); 9821573Srgrimes FD_SET(fd, &rfds); 983304284Sache maxfd = fd > maxfd ? fd : maxfd; 9841573Srgrimes } else { 985304284Sache PJDLOG_ASSERT(res->hr_conn == NULL); 9861573Srgrimes } 98774918Speter } 988304284Sache 989304284Sache PJDLOG_ASSERT(maxfd + 1 <= (int)FD_SETSIZE); 99074918Speter ret = select(maxfd + 1, &rfds, NULL, NULL, &seltimeout); 991304284Sache now = time(NULL); 99274918Speter if (lastcheck + REPORT_INTERVAL <= now) { 9931573Srgrimes hook_check(); 9941573Srgrimes lastcheck = now; 995228755Seadler } 9961573Srgrimes if (ret == 0) { 997228755Seadler /* 9981573Srgrimes * select(2) timed out, so there should be no 9991573Srgrimes * descriptors to check. 10001573Srgrimes */ 1001159294Sdelphij continue; 10021573Srgrimes } else if (ret == -1) { 1003304284Sache if (errno == EINTR) 10041573Srgrimes continue; 100574921Speter KEEP_ERRNO((void)pidfile_remove(pfh)); 100674918Speter pjdlog_exit(EX_OSERR, "select() failed"); 100774918Speter } 100874918Speter 10091573Srgrimes /* 10101573Srgrimes * Check for signals before we do anything to update our 1011228755Seadler * info about terminated workers in the meantime. 10121573Srgrimes */ 10131573Srgrimes check_signals(); 10141573Srgrimes 1015159294Sdelphij if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds)) 10161573Srgrimes control_handle(cfg); 1017304284Sache if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds)) 10181573Srgrimes listen_accept(); 101974921Speter TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 102074918Speter if (res->hr_event == NULL) 102174918Speter continue; 102274918Speter if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) { 10231573Srgrimes if (event_recv(res) == 0) 1024228755Seadler continue; 1025228755Seadler /* The worker process exited? */ 10261573Srgrimes proto_close(res->hr_event); 10271573Srgrimes res->hr_event = NULL; 1028180021Smtm if (res->hr_conn != NULL) { 1029180021Smtm proto_close(res->hr_conn); 10301573Srgrimes res->hr_conn = NULL; 1031159294Sdelphij } 10321573Srgrimes continue; 10331573Srgrimes } 10341573Srgrimes if (res->hr_role == HAST_ROLE_PRIMARY) { 10351573Srgrimes PJDLOG_ASSERT(res->hr_conn != NULL); 10361573Srgrimes if (FD_ISSET(proto_descriptor(res->hr_conn), 10371573Srgrimes &rfds)) { 10381573Srgrimes connection_migrate(res); 103974918Speter } 1040159294Sdelphij } else { 10411573Srgrimes PJDLOG_ASSERT(res->hr_conn == NULL); 1042132817Stjr } 1043132817Stjr } 10441573Srgrimes } 1045132817Stjr} 1046132817Stjr 1047304284Sachestatic void 1048304284Sachedummy_sighandler(int sig __unused) 1049304284Sache{ 1050304284Sache /* Nothing to do. */ 1051304284Sache} 1052304284Sache 1053304284Sacheint 1054304284Sachemain(int argc, char *argv[]) 105574921Speter{ 1056132817Stjr const char *pidfile; 1057132817Stjr pid_t otherpid; 1058132817Stjr bool foreground; 105974921Speter int debuglevel; 106074921Speter sigset_t mask; 10611573Srgrimes 10621573Srgrimes foreground = false; 1063304284Sache debuglevel = 0; 1064304284Sache pidfile = HASTD_PIDFILE; 1065304284Sache 1066304284Sache for (;;) { 1067304284Sache int ch; 1068304284Sache 1069304284Sache ch = getopt(argc, argv, "c:dFhP:"); 1070304284Sache if (ch == -1) 1071304284Sache break; 1072304284Sache switch (ch) { 1073304284Sache case 'c': 1074304284Sache cfgpath = optarg; 1075304284Sache break; 1076304284Sache case 'd': 1077304284Sache debuglevel++; 1078304284Sache break; 1079304284Sache case 'F': 1080304284Sache foreground = true; 1081304284Sache break; 1082304284Sache case 'P': 1083304284Sache pidfile = optarg; 1084304284Sache break; 1085304284Sache case 'h': 10861573Srgrimes default: 10878870Srgrimes usage(); 1088159294Sdelphij } 10891573Srgrimes } 109090045Sobrien argc -= optind; 10911573Srgrimes argv += optind; 1092304284Sache 1093304284Sache pjdlog_init(PJDLOG_MODE_STD); 1094304284Sache pjdlog_debug_set(debuglevel); 1095304284Sache 1096304284Sache g_gate_load(); 1097304284Sache 1098304284Sache pfh = pidfile_open(pidfile, 0600, &otherpid); 1099304284Sache if (pfh == NULL) { 1100304284Sache if (errno == EEXIST) { 1101304284Sache pjdlog_exitx(EX_TEMPFAIL, 1102304284Sache "Another hastd is already running, pid: %jd.", 1103304284Sache (intmax_t)otherpid); 11041573Srgrimes } 11051573Srgrimes /* If we cannot create pidfile from other reasons, only warn. */ 1106 pjdlog_errno(LOG_WARNING, "Unable to open or create pidfile"); 1107 } 1108 1109 cfg = yy_config_parse(cfgpath, true); 1110 PJDLOG_ASSERT(cfg != NULL); 1111 1112 /* 1113 * Restore default actions for interesting signals in case parent 1114 * process (like init(8)) decided to ignore some of them (like SIGHUP). 1115 */ 1116 PJDLOG_VERIFY(signal(SIGHUP, SIG_DFL) != SIG_ERR); 1117 PJDLOG_VERIFY(signal(SIGINT, SIG_DFL) != SIG_ERR); 1118 PJDLOG_VERIFY(signal(SIGTERM, SIG_DFL) != SIG_ERR); 1119 /* 1120 * Because SIGCHLD is ignored by default, setup dummy handler for it, 1121 * so we can mask it. 1122 */ 1123 PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR); 1124 1125 PJDLOG_VERIFY(sigemptyset(&mask) == 0); 1126 PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); 1127 PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); 1128 PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0); 1129 PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0); 1130 PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); 1131 1132 /* Listen on control address. */ 1133 if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) { 1134 KEEP_ERRNO((void)pidfile_remove(pfh)); 1135 pjdlog_exit(EX_OSERR, "Unable to listen on control address %s", 1136 cfg->hc_controladdr); 1137 } 1138 /* Listen for remote connections. */ 1139 if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) { 1140 KEEP_ERRNO((void)pidfile_remove(pfh)); 1141 pjdlog_exit(EX_OSERR, "Unable to listen on address %s", 1142 cfg->hc_listenaddr); 1143 } 1144 1145 if (!foreground) { 1146 if (daemon(0, 0) < 0) { 1147 KEEP_ERRNO((void)pidfile_remove(pfh)); 1148 pjdlog_exit(EX_OSERR, "Unable to daemonize"); 1149 } 1150 1151 /* Start logging to syslog. */ 1152 pjdlog_mode_set(PJDLOG_MODE_SYSLOG); 1153 1154 /* Write PID to a file. */ 1155 if (pidfile_write(pfh) < 0) { 1156 pjdlog_errno(LOG_WARNING, 1157 "Unable to write PID to a file"); 1158 } 1159 } 1160 1161 hook_init(); 1162 1163 main_loop(); 1164 1165 exit(0); 1166} 1167