control.c revision 219351
1204076Spjd/*- 2204076Spjd * Copyright (c) 2009-2010 The FreeBSD Foundation 3204076Spjd * All rights reserved. 4204076Spjd * 5204076Spjd * This software was developed by Pawel Jakub Dawidek under sponsorship from 6204076Spjd * the FreeBSD Foundation. 7204076Spjd * 8204076Spjd * Redistribution and use in source and binary forms, with or without 9204076Spjd * modification, are permitted provided that the following conditions 10204076Spjd * are met: 11204076Spjd * 1. Redistributions of source code must retain the above copyright 12204076Spjd * notice, this list of conditions and the following disclaimer. 13204076Spjd * 2. Redistributions in binary form must reproduce the above copyright 14204076Spjd * notice, this list of conditions and the following disclaimer in the 15204076Spjd * documentation and/or other materials provided with the distribution. 16204076Spjd * 17204076Spjd * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18204076Spjd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19204076Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20204076Spjd * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21204076Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22204076Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23204076Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24204076Spjd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25204076Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26204076Spjd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27204076Spjd * SUCH DAMAGE. 28204076Spjd */ 29204076Spjd 30204076Spjd#include <sys/cdefs.h> 31204076Spjd__FBSDID("$FreeBSD: head/sbin/hastd/control.c 219351 2011-03-06 22:56:14Z pjd $"); 32204076Spjd 33204076Spjd#include <sys/types.h> 34204076Spjd#include <sys/wait.h> 35204076Spjd 36204076Spjd#include <assert.h> 37204076Spjd#include <errno.h> 38204076Spjd#include <pthread.h> 39213003Spjd#include <signal.h> 40204076Spjd#include <stdio.h> 41204076Spjd#include <string.h> 42213006Spjd#include <unistd.h> 43204076Spjd 44204076Spjd#include "hast.h" 45204076Spjd#include "hastd.h" 46219351Spjd#include "hast_checksum.h" 47204076Spjd#include "hast_proto.h" 48211978Spjd#include "hooks.h" 49204076Spjd#include "nv.h" 50204076Spjd#include "pjdlog.h" 51204076Spjd#include "proto.h" 52204076Spjd#include "subr.h" 53204076Spjd 54204076Spjd#include "control.h" 55204076Spjd 56213006Spjdvoid 57213006Spjdchild_cleanup(struct hast_resource *res) 58213006Spjd{ 59213006Spjd 60213006Spjd proto_close(res->hr_ctrl); 61213006Spjd res->hr_ctrl = NULL; 62213579Spjd if (res->hr_event != NULL) { 63213579Spjd proto_close(res->hr_event); 64213579Spjd res->hr_event = NULL; 65213579Spjd } 66218218Spjd if (res->hr_conn != NULL) { 67218218Spjd proto_close(res->hr_conn); 68218218Spjd res->hr_conn = NULL; 69218218Spjd } 70213006Spjd res->hr_workerpid = 0; 71213006Spjd} 72213006Spjd 73204076Spjdstatic void 74210882Spjdcontrol_set_role_common(struct hastd_config *cfg, struct nv *nvout, 75210882Spjd uint8_t role, struct hast_resource *res, const char *name, unsigned int no) 76204076Spjd{ 77211978Spjd int oldrole; 78204076Spjd 79204076Spjd /* Name is always needed. */ 80210882Spjd if (name != NULL) 81210882Spjd nv_add_string(nvout, name, "resource%u", no); 82204076Spjd 83204076Spjd if (res == NULL) { 84210882Spjd assert(cfg != NULL); 85210882Spjd assert(name != NULL); 86210882Spjd 87204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 88204076Spjd if (strcmp(res->hr_name, name) == 0) 89204076Spjd break; 90204076Spjd } 91204076Spjd if (res == NULL) { 92204076Spjd nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 93204076Spjd return; 94204076Spjd } 95204076Spjd } 96204076Spjd assert(res != NULL); 97204076Spjd 98204076Spjd /* Send previous role back. */ 99204076Spjd nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 100204076Spjd 101204076Spjd /* Nothing changed, return here. */ 102204076Spjd if (role == res->hr_role) 103204076Spjd return; 104204076Spjd 105204076Spjd pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 106204076Spjd pjdlog_info("Role changed to %s.", role2str(role)); 107204076Spjd 108204076Spjd /* Change role to the new one. */ 109211978Spjd oldrole = res->hr_role; 110204076Spjd res->hr_role = role; 111204076Spjd pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 112204076Spjd 113204076Spjd /* 114204076Spjd * If previous role was primary or secondary we have to kill process 115204076Spjd * doing that work. 116204076Spjd */ 117204076Spjd if (res->hr_workerpid != 0) { 118204076Spjd if (kill(res->hr_workerpid, SIGTERM) < 0) { 119204076Spjd pjdlog_errno(LOG_WARNING, 120204076Spjd "Unable to kill worker process %u", 121204076Spjd (unsigned int)res->hr_workerpid); 122204076Spjd } else if (waitpid(res->hr_workerpid, NULL, 0) != 123204076Spjd res->hr_workerpid) { 124204076Spjd pjdlog_errno(LOG_WARNING, 125204076Spjd "Error while waiting for worker process %u", 126204076Spjd (unsigned int)res->hr_workerpid); 127204076Spjd } else { 128204076Spjd pjdlog_debug(1, "Worker process %u stopped.", 129204076Spjd (unsigned int)res->hr_workerpid); 130204076Spjd } 131213006Spjd child_cleanup(res); 132204076Spjd } 133204076Spjd 134204076Spjd /* Start worker process if we are changing to primary. */ 135204076Spjd if (role == HAST_ROLE_PRIMARY) 136204076Spjd hastd_primary(res); 137204076Spjd pjdlog_prefix_set("%s", ""); 138211978Spjd hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole), 139211978Spjd role2str(res->hr_role), NULL); 140204076Spjd} 141204076Spjd 142210882Spjdvoid 143210882Spjdcontrol_set_role(struct hast_resource *res, uint8_t role) 144210882Spjd{ 145210882Spjd 146210882Spjd control_set_role_common(NULL, NULL, role, res, NULL, 0); 147210882Spjd} 148210882Spjd 149204076Spjdstatic void 150204076Spjdcontrol_status_worker(struct hast_resource *res, struct nv *nvout, 151204076Spjd unsigned int no) 152204076Spjd{ 153204076Spjd struct nv *cnvin, *cnvout; 154204076Spjd const char *str; 155204076Spjd int error; 156204076Spjd 157204076Spjd cnvin = cnvout = NULL; 158204076Spjd error = 0; 159204076Spjd 160204076Spjd /* 161204076Spjd * Prepare and send command to worker process. 162204076Spjd */ 163204076Spjd cnvout = nv_alloc(); 164204076Spjd nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd"); 165204076Spjd error = nv_error(cnvout); 166204076Spjd if (error != 0) { 167217737Spjd pjdlog_common(LOG_ERR, 0, error, 168217737Spjd "Unable to prepare control header"); 169204076Spjd goto end; 170204076Spjd } 171204076Spjd if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) { 172204076Spjd error = errno; 173217737Spjd pjdlog_errno(LOG_ERR, "Unable to send control header"); 174204076Spjd goto end; 175204076Spjd } 176204076Spjd 177204076Spjd /* 178204076Spjd * Receive response. 179204076Spjd */ 180204076Spjd if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) { 181204076Spjd error = errno; 182217737Spjd pjdlog_errno(LOG_ERR, "Unable to receive control header"); 183204076Spjd goto end; 184204076Spjd } 185204076Spjd 186217730Spjd error = nv_get_int16(cnvin, "error"); 187204076Spjd if (error != 0) 188204076Spjd goto end; 189204076Spjd 190204076Spjd if ((str = nv_get_string(cnvin, "status")) == NULL) { 191204076Spjd error = ENOENT; 192217737Spjd pjdlog_errno(LOG_ERR, "Field 'status' is missing."); 193204076Spjd goto end; 194204076Spjd } 195204076Spjd nv_add_string(nvout, str, "status%u", no); 196204076Spjd nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no); 197204076Spjd nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"), 198204076Spjd "extentsize%u", no); 199204076Spjd nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"), 200204076Spjd "keepdirty%u", no); 201204076Spjdend: 202204076Spjd if (cnvin != NULL) 203204076Spjd nv_free(cnvin); 204204076Spjd if (cnvout != NULL) 205204076Spjd nv_free(cnvout); 206204076Spjd if (error != 0) 207204076Spjd nv_add_int16(nvout, error, "error"); 208204076Spjd} 209204076Spjd 210204076Spjdstatic void 211204076Spjdcontrol_status(struct hastd_config *cfg, struct nv *nvout, 212204076Spjd struct hast_resource *res, const char *name, unsigned int no) 213204076Spjd{ 214204076Spjd 215204076Spjd assert(cfg != NULL); 216204076Spjd assert(nvout != NULL); 217204076Spjd assert(name != NULL); 218204076Spjd 219204076Spjd /* Name is always needed. */ 220204076Spjd nv_add_string(nvout, name, "resource%u", no); 221204076Spjd 222204076Spjd if (res == NULL) { 223204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 224204076Spjd if (strcmp(res->hr_name, name) == 0) 225204076Spjd break; 226204076Spjd } 227204076Spjd if (res == NULL) { 228204076Spjd nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 229204076Spjd return; 230204076Spjd } 231204076Spjd } 232204076Spjd assert(res != NULL); 233204076Spjd nv_add_string(nvout, res->hr_provname, "provname%u", no); 234204076Spjd nv_add_string(nvout, res->hr_localpath, "localpath%u", no); 235204076Spjd nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no); 236204076Spjd switch (res->hr_replication) { 237204076Spjd case HAST_REPLICATION_FULLSYNC: 238204076Spjd nv_add_string(nvout, "fullsync", "replication%u", no); 239204076Spjd break; 240204076Spjd case HAST_REPLICATION_MEMSYNC: 241204076Spjd nv_add_string(nvout, "memsync", "replication%u", no); 242204076Spjd break; 243204076Spjd case HAST_REPLICATION_ASYNC: 244204076Spjd nv_add_string(nvout, "async", "replication%u", no); 245204076Spjd break; 246204076Spjd default: 247204076Spjd nv_add_string(nvout, "unknown", "replication%u", no); 248204076Spjd break; 249204076Spjd } 250219351Spjd nv_add_string(nvout, checksum_name(res->hr_checksum), 251219351Spjd "checksum%u", no); 252204076Spjd nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 253204076Spjd 254204076Spjd switch (res->hr_role) { 255204076Spjd case HAST_ROLE_PRIMARY: 256204076Spjd assert(res->hr_workerpid != 0); 257204076Spjd /* FALLTHROUGH */ 258204076Spjd case HAST_ROLE_SECONDARY: 259204076Spjd if (res->hr_workerpid != 0) 260204076Spjd break; 261204076Spjd /* FALLTHROUGH */ 262204076Spjd default: 263204076Spjd return; 264204076Spjd } 265204076Spjd 266204076Spjd /* 267204076Spjd * If we are here, it means that we have a worker process, which we 268204076Spjd * want to ask some questions. 269204076Spjd */ 270204076Spjd control_status_worker(res, nvout, no); 271204076Spjd} 272204076Spjd 273204076Spjdvoid 274204076Spjdcontrol_handle(struct hastd_config *cfg) 275204076Spjd{ 276204076Spjd struct proto_conn *conn; 277204076Spjd struct nv *nvin, *nvout; 278204076Spjd unsigned int ii; 279204076Spjd const char *str; 280204076Spjd uint8_t cmd, role; 281204076Spjd int error; 282204076Spjd 283204076Spjd if (proto_accept(cfg->hc_controlconn, &conn) < 0) { 284204076Spjd pjdlog_errno(LOG_ERR, "Unable to accept control connection"); 285204076Spjd return; 286204076Spjd } 287204076Spjd 288217969Spjd cfg->hc_controlin = conn; 289204076Spjd nvin = nvout = NULL; 290204076Spjd role = HAST_ROLE_UNDEF; 291204076Spjd 292204076Spjd if (hast_proto_recv_hdr(conn, &nvin) < 0) { 293204076Spjd pjdlog_errno(LOG_ERR, "Unable to receive control header"); 294204076Spjd nvin = NULL; 295204076Spjd goto close; 296204076Spjd } 297204076Spjd 298204076Spjd /* Obtain command code. 0 means that nv_get_uint8() failed. */ 299204076Spjd cmd = nv_get_uint8(nvin, "cmd"); 300204076Spjd if (cmd == 0) { 301204076Spjd pjdlog_error("Control header is missing 'cmd' field."); 302204076Spjd error = EHAST_INVALID; 303204076Spjd goto close; 304204076Spjd } 305204076Spjd 306204076Spjd /* Allocate outgoing nv structure. */ 307204076Spjd nvout = nv_alloc(); 308204076Spjd if (nvout == NULL) { 309204076Spjd pjdlog_error("Unable to allocate header for control response."); 310204076Spjd error = EHAST_NOMEMORY; 311204076Spjd goto close; 312204076Spjd } 313204076Spjd 314204076Spjd error = 0; 315204076Spjd 316204076Spjd str = nv_get_string(nvin, "resource0"); 317204076Spjd if (str == NULL) { 318204076Spjd pjdlog_error("Control header is missing 'resource0' field."); 319204076Spjd error = EHAST_INVALID; 320204076Spjd goto fail; 321204076Spjd } 322204076Spjd if (cmd == HASTCTL_SET_ROLE) { 323204076Spjd role = nv_get_uint8(nvin, "role"); 324204076Spjd switch (role) { 325204076Spjd case HAST_ROLE_INIT: /* Is that valid to set, hmm? */ 326204076Spjd case HAST_ROLE_PRIMARY: 327204076Spjd case HAST_ROLE_SECONDARY: 328204076Spjd break; 329204076Spjd default: 330204076Spjd pjdlog_error("Invalid role received (%hhu).", role); 331204076Spjd error = EHAST_INVALID; 332204076Spjd goto fail; 333204076Spjd } 334204076Spjd } 335204076Spjd if (strcmp(str, "all") == 0) { 336204076Spjd struct hast_resource *res; 337204076Spjd 338204076Spjd /* All configured resources. */ 339204076Spjd 340204076Spjd ii = 0; 341204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 342204076Spjd switch (cmd) { 343204076Spjd case HASTCTL_SET_ROLE: 344210882Spjd control_set_role_common(cfg, nvout, role, res, 345204076Spjd res->hr_name, ii++); 346204076Spjd break; 347204076Spjd case HASTCTL_STATUS: 348204076Spjd control_status(cfg, nvout, res, res->hr_name, 349204076Spjd ii++); 350204076Spjd break; 351204076Spjd default: 352204076Spjd pjdlog_error("Invalid command received (%hhu).", 353204076Spjd cmd); 354204076Spjd error = EHAST_UNIMPLEMENTED; 355204076Spjd goto fail; 356204076Spjd } 357204076Spjd } 358204076Spjd } else { 359204076Spjd /* Only selected resources. */ 360204076Spjd 361204076Spjd for (ii = 0; ; ii++) { 362204076Spjd str = nv_get_string(nvin, "resource%u", ii); 363204076Spjd if (str == NULL) 364204076Spjd break; 365204076Spjd switch (cmd) { 366204076Spjd case HASTCTL_SET_ROLE: 367210882Spjd control_set_role_common(cfg, nvout, role, NULL, 368210882Spjd str, ii); 369204076Spjd break; 370204076Spjd case HASTCTL_STATUS: 371204076Spjd control_status(cfg, nvout, NULL, str, ii); 372204076Spjd break; 373204076Spjd default: 374204076Spjd pjdlog_error("Invalid command received (%hhu).", 375204076Spjd cmd); 376204076Spjd error = EHAST_UNIMPLEMENTED; 377204076Spjd goto fail; 378204076Spjd } 379204076Spjd } 380204076Spjd } 381204076Spjd if (nv_error(nvout) != 0) 382204076Spjd goto close; 383204076Spjdfail: 384204076Spjd if (error != 0) 385204076Spjd nv_add_int16(nvout, error, "error"); 386204076Spjd 387204076Spjd if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) 388204076Spjd pjdlog_errno(LOG_ERR, "Unable to send control response"); 389204076Spjdclose: 390204076Spjd if (nvin != NULL) 391204076Spjd nv_free(nvin); 392204076Spjd if (nvout != NULL) 393204076Spjd nv_free(nvout); 394204076Spjd proto_close(conn); 395217969Spjd cfg->hc_controlin = NULL; 396204076Spjd} 397204076Spjd 398204076Spjd/* 399204076Spjd * Thread handles control requests from the parent. 400204076Spjd */ 401204076Spjdvoid * 402204076Spjdctrl_thread(void *arg) 403204076Spjd{ 404204076Spjd struct hast_resource *res = arg; 405204076Spjd struct nv *nvin, *nvout; 406204076Spjd uint8_t cmd; 407204076Spjd 408204076Spjd for (;;) { 409204076Spjd if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 410204076Spjd if (sigexit_received) 411204076Spjd pthread_exit(NULL); 412204076Spjd pjdlog_errno(LOG_ERR, 413204076Spjd "Unable to receive control message"); 414213004Spjd kill(getpid(), SIGTERM); 415213004Spjd pthread_exit(NULL); 416204076Spjd } 417204076Spjd cmd = nv_get_uint8(nvin, "cmd"); 418204076Spjd if (cmd == 0) { 419204076Spjd pjdlog_error("Control message is missing 'cmd' field."); 420204076Spjd nv_free(nvin); 421204076Spjd continue; 422204076Spjd } 423204076Spjd nvout = nv_alloc(); 424204076Spjd switch (cmd) { 425204076Spjd case HASTCTL_STATUS: 426204076Spjd if (res->hr_remotein != NULL && 427204076Spjd res->hr_remoteout != NULL) { 428204076Spjd nv_add_string(nvout, "complete", "status"); 429204076Spjd } else { 430204076Spjd nv_add_string(nvout, "degraded", "status"); 431204076Spjd } 432204076Spjd nv_add_uint32(nvout, (uint32_t)res->hr_extentsize, 433204076Spjd "extentsize"); 434204076Spjd if (res->hr_role == HAST_ROLE_PRIMARY) { 435204076Spjd nv_add_uint32(nvout, 436204076Spjd (uint32_t)res->hr_keepdirty, "keepdirty"); 437204076Spjd nv_add_uint64(nvout, 438204076Spjd (uint64_t)(activemap_ndirty(res->hr_amp) * 439204076Spjd res->hr_extentsize), "dirty"); 440204076Spjd } else { 441204076Spjd nv_add_uint32(nvout, (uint32_t)0, "keepdirty"); 442204076Spjd nv_add_uint64(nvout, (uint64_t)0, "dirty"); 443204076Spjd } 444217784Spjd nv_add_int16(nvout, 0, "error"); 445204076Spjd break; 446217784Spjd case HASTCTL_RELOAD: 447217784Spjd /* 448217784Spjd * When parent receives SIGHUP and discovers that 449217784Spjd * something related to us has changes, it sends reload 450217784Spjd * message to us. 451217784Spjd */ 452217784Spjd assert(res->hr_role == HAST_ROLE_PRIMARY); 453217784Spjd primary_config_reload(res, nvin); 454217784Spjd nv_add_int16(nvout, 0, "error"); 455217784Spjd break; 456204076Spjd default: 457204076Spjd nv_add_int16(nvout, EINVAL, "error"); 458204076Spjd break; 459204076Spjd } 460217784Spjd nv_free(nvin); 461204076Spjd if (nv_error(nvout) != 0) { 462204076Spjd pjdlog_error("Unable to create answer on control message."); 463204076Spjd nv_free(nvout); 464204076Spjd continue; 465204076Spjd } 466204076Spjd if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) { 467204076Spjd pjdlog_errno(LOG_ERR, 468204076Spjd "Unable to send reply to control message"); 469204076Spjd } 470204076Spjd nv_free(nvout); 471204076Spjd } 472204076Spjd /* NOTREACHED */ 473204076Spjd return (NULL); 474204076Spjd} 475